From 1d2f1f90a1e004b0c1b8a73ed4394a93f09104b3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Feb 2006 16:41:20 -0800 Subject: [PATCH] [SPARC64]: Sun4v cross-call sending support. Technically the hypervisor call supports sending in a list of all cpus to get the cross-call, but I only pass in one cpu at a time for now. The multi-cpu support is there, just ifdef'd out so it's easy to enable or delete it later. Signed-off-by: David S. Miller --- arch/sparc64/kernel/irq.c | 22 ++++++ arch/sparc64/kernel/smp.c | 125 +++++++++++++++++++++++++++++++++- arch/sparc64/kernel/traps.c | 6 +- include/asm-sparc64/cpudata.h | 14 +++- 4 files changed, 163 insertions(+), 4 deletions(-) diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index ff201c007e0c..c80d2531ec46 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -900,6 +900,24 @@ static void __cpuinit init_one_kbuf(unsigned long *pa_ptr) *pa_ptr = __pa(page); } +static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb) +{ +#ifdef CONFIG_SMP + unsigned long page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + page = get_zeroed_page(GFP_ATOMIC); + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + /* Allocate and init the mondo and error queues for this cpu. */ void __cpuinit sun4v_init_mondo_queues(void) { @@ -908,10 +926,14 @@ void __cpuinit sun4v_init_mondo_queues(void) init_one_mondo(&tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO); init_one_mondo(&tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO); + init_one_mondo(&tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR); init_one_kbuf(&tb->resum_kernel_buf_pa); + init_one_mondo(&tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR); init_one_kbuf(&tb->nonresum_kernel_buf_pa); + + init_cpu_send_mondo_info(tb); } /* Only invoked on boot processor. */ diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 223cc6bd369a..c10a3a8639e8 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -531,10 +531,133 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas } } +#if 0 +/* Multi-cpu list version. */ +static int init_cpu_list(u16 *list, cpumask_t mask) +{ + int i, cnt; + + cnt = 0; + for_each_cpu_mask(i, mask) + list[cnt++] = i; + + return cnt; +} + +static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask) +{ + int i; + + for (i = 0; i < orig_cnt; i++) { + if (list[i] == 0xffff) + cpu_clear(i, mask); + } + + return init_cpu_list(list, mask); +} + static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { - /* XXX implement me */ + int this_cpu = get_cpu(); + struct trap_per_cpu *tb = &trap_block[this_cpu]; + u64 *mondo = __va(tb->cpu_mondo_block_pa); + u16 *cpu_list = __va(tb->cpu_list_pa); + int cnt, retries; + + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + retries = 0; + cnt = init_cpu_list(cpu_list, mask); + do { + register unsigned long func __asm__("%o0"); + register unsigned long arg0 __asm__("%o1"); + register unsigned long arg1 __asm__("%o2"); + register unsigned long arg2 __asm__("%o3"); + + func = HV_FAST_CPU_MONDO_SEND; + arg0 = cnt; + arg1 = tb->cpu_list_pa; + arg2 = tb->cpu_mondo_block_pa; + + __asm__ __volatile__("ta %8" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2) + : "0" (func), "1" (arg0), + "2" (arg1), "3" (arg2), + "i" (HV_FAST_TRAP) + : "memory"); + if (likely(func == HV_EOK)) + break; + + if (unlikely(++retries > 100)) { + printk("CPU[%d]: sun4v mondo error %lu\n", + this_cpu, func); + break; + } + + cnt = update_cpu_list(cpu_list, cnt, mask); + + udelay(2 * cnt); + } while (1); + + put_cpu(); } +#else +/* Single-cpu list version. */ +static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) +{ + int this_cpu = get_cpu(); + struct trap_per_cpu *tb = &trap_block[this_cpu]; + u64 *mondo = __va(tb->cpu_mondo_block_pa); + u16 *cpu_list = __va(tb->cpu_list_pa); + int i; + + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + for_each_cpu_mask(i, mask) { + int retries = 0; + + do { + register unsigned long func __asm__("%o0"); + register unsigned long arg0 __asm__("%o1"); + register unsigned long arg1 __asm__("%o2"); + register unsigned long arg2 __asm__("%o3"); + + cpu_list[0] = i; + func = HV_FAST_CPU_MONDO_SEND; + arg0 = 1; + arg1 = tb->cpu_list_pa; + arg2 = tb->cpu_mondo_block_pa; + + __asm__ __volatile__("ta %8" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2) + : "0" (func), "1" (arg0), + "2" (arg1), "3" (arg2), + "i" (HV_FAST_TRAP) + : "memory"); + if (likely(func == HV_EOK)) + break; + + if (unlikely(++retries > 100)) { + printk("CPU[%d]: sun4v mondo error %lu\n", + this_cpu, func); + break; + } + + udelay(2 * i); + } while (1); + } + + put_cpu(); +} +#endif /* Send cross call to all processors mentioned in MASK * except self. diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 5417ff1b9345..ac171161e794 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2377,7 +2377,11 @@ void __init trap_init(void) (TRAP_PER_CPU_NONRESUM_KBUF_PA != offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || (TRAP_PER_CPU_FAULT_INFO != - offsetof(struct trap_per_cpu, fault_info))) + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa))) trap_per_cpu_offsets_are_bolixed_dave(); /* Attach to the address space of init_task. On SMP we diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 7f0a74ec47f6..338b0ca5b519 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -65,8 +65,16 @@ struct trap_per_cpu { unsigned long nonresum_mondo_pa; unsigned long nonresum_kernel_buf_pa; -/* Dcache lines 3 and 4: Hypervisor Fault Status */ +/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */ struct hv_fault_status fault_info; + +/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */ + unsigned long cpu_mondo_block_pa; + unsigned long cpu_list_pa; + unsigned long __pad1[2]; + +/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */ + unsigned long __pad2[4]; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(void); @@ -108,8 +116,10 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30 #define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38 #define TRAP_PER_CPU_FAULT_INFO 0x40 +#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0 +#define TRAP_PER_CPU_CPU_LIST_PA 0xc8 -#define TRAP_BLOCK_SZ_SHIFT 7 +#define TRAP_BLOCK_SZ_SHIFT 8 #include