[SPARC64]: Sun4v cross-call sending support.

Technically the hypervisor call supports sending in a list of all cpus to get the cross-call, but I only pass in one cpu at a time for now. The multi-cpu support is there, just ifdef'd out so it's easy to enable or delete it later. Signed-off-by: David S. Miller <davem@davemloft.net>
2006-02-08 16:41:20 -08:00 · 2006-02-08 16:41:20 -08:00 · 1d2f1f90a1
commit 1d2f1f90a1
parent 5b0c0572fc
4 changed files with 163 additions and 4 deletions
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@ -900,6 +900,24 @@ static void __cpuinit init_one_kbuf(unsigned long *pa_ptr)
 	*pa_ptr = __pa(page);
 }

+static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb)
+{
+#ifdef CONFIG_SMP
+	unsigned long page;
+
+	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+
+	page = get_zeroed_page(GFP_ATOMIC);
+	if (!page) {
+		prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+		prom_halt();
+	}
+
+	tb->cpu_mondo_block_pa = __pa(page);
+	tb->cpu_list_pa = __pa(page + 64);
+#endif
+}
+
 /* Allocate and init the mondo and error queues for this cpu.  */
 void __cpuinit sun4v_init_mondo_queues(void)
 {
@ -908,10 +926,14 @@ void __cpuinit sun4v_init_mondo_queues(void)

 	init_one_mondo(&tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO);
 	init_one_mondo(&tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO);
+
 	init_one_mondo(&tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR);
 	init_one_kbuf(&tb->resum_kernel_buf_pa);
+
 	init_one_mondo(&tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR);
 	init_one_kbuf(&tb->nonresum_kernel_buf_pa);
+
+	init_cpu_send_mondo_info(tb);
 }

 /* Only invoked on boot processor. */
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@ -531,10 +531,133 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
 	}
 }

+#if 0
+/* Multi-cpu list version.  */
+static int init_cpu_list(u16 *list, cpumask_t mask)
+{
+	int i, cnt;
+
+	cnt = 0;
+	for_each_cpu_mask(i, mask)
+		list[cnt++] = i;
+
+	return cnt;
+}
+
+static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask)
+{
+	int i;
+
+	for (i = 0; i < orig_cnt; i++) {
+		if (list[i] == 0xffff)
+			cpu_clear(i, mask);
+	}
+
+	return init_cpu_list(list, mask);
+}
+
 static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
-	/* XXX implement me */
+	int this_cpu = get_cpu();
+	struct trap_per_cpu *tb = &trap_block[this_cpu];
+	u64 *mondo = __va(tb->cpu_mondo_block_pa);
+	u16 *cpu_list = __va(tb->cpu_list_pa);
+	int cnt, retries;
+
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	retries = 0;
+	cnt = init_cpu_list(cpu_list, mask);
+	do {
+		register unsigned long func __asm__("%o0");
+		register unsigned long arg0 __asm__("%o1");
+		register unsigned long arg1 __asm__("%o2");
+		register unsigned long arg2 __asm__("%o3");
+
+		func = HV_FAST_CPU_MONDO_SEND;
+		arg0 = cnt;
+		arg1 = tb->cpu_list_pa;
+		arg2 = tb->cpu_mondo_block_pa;
+
+		__asm__ __volatile__("ta	%8"
+				     : "=&r" (func), "=&r" (arg0),
+				       "=&r" (arg1), "=&r" (arg2)
+				     : "0" (func), "1" (arg0),
+				       "2" (arg1), "3" (arg2),
+				       "i" (HV_FAST_TRAP)
+				     : "memory");
+		if (likely(func == HV_EOK))
+			break;
+
+		if (unlikely(++retries > 100)) {
+			printk("CPU[%d]: sun4v mondo error %lu\n",
+			       this_cpu, func);
+			break;
+		}
+
+		cnt = update_cpu_list(cpu_list, cnt, mask);
+
+		udelay(2 * cnt);
+	} while (1);
+
+	put_cpu();
 }
+#else
+/* Single-cpu list version.  */
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+{
+	int this_cpu = get_cpu();
+	struct trap_per_cpu *tb = &trap_block[this_cpu];
+	u64 *mondo = __va(tb->cpu_mondo_block_pa);
+	u16 *cpu_list = __va(tb->cpu_list_pa);
+	int i;
+
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	for_each_cpu_mask(i, mask) {
+		int retries = 0;
+
+		do {
+			register unsigned long func __asm__("%o0");
+			register unsigned long arg0 __asm__("%o1");
+			register unsigned long arg1 __asm__("%o2");
+			register unsigned long arg2 __asm__("%o3");
+
+			cpu_list[0] = i;
+			func = HV_FAST_CPU_MONDO_SEND;
+			arg0 = 1;
+			arg1 = tb->cpu_list_pa;
+			arg2 = tb->cpu_mondo_block_pa;
+
+			__asm__ __volatile__("ta	%8"
+					     : "=&r" (func), "=&r" (arg0),
+					       "=&r" (arg1), "=&r" (arg2)
+					     : "0" (func), "1" (arg0),
+					       "2" (arg1), "3" (arg2),
+					       "i" (HV_FAST_TRAP)
+					     : "memory");
+			if (likely(func == HV_EOK))
+				break;
+
+			if (unlikely(++retries > 100)) {
+				printk("CPU[%d]: sun4v mondo error %lu\n",
+				       this_cpu, func);
+				break;
+			}
+
+			udelay(2 * i);
+		} while (1);
+	}
+
+	put_cpu();
+}
+#endif

 /* Send cross call to all processors mentioned in MASK
 * except self.
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@ -2377,7 +2377,11 @@ void __init trap_init(void)
 	    (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
 	     offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
 	    (TRAP_PER_CPU_FAULT_INFO !=
-	     offsetof(struct trap_per_cpu, fault_info)))
+	     offsetof(struct trap_per_cpu, fault_info)) ||
+	    (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
+	     offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
+	    (TRAP_PER_CPU_CPU_LIST_PA !=
+	     offsetof(struct trap_per_cpu, cpu_list_pa)))
 		trap_per_cpu_offsets_are_bolixed_dave();

 	/* Attach to the address space of init_task.  On SMP we
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@ -65,8 +65,16 @@ struct trap_per_cpu {
 	unsigned long		nonresum_mondo_pa;
 	unsigned long		nonresum_kernel_buf_pa;

-/* Dcache lines 3 and 4: Hypervisor Fault Status */
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
 	struct hv_fault_status	fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list.  */
+	unsigned long		cpu_mondo_block_pa;
+	unsigned long		cpu_list_pa;
+	unsigned long		__pad1[2];
+
+/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size.  */
+	unsigned long		__pad2[4];
 } __attribute__((aligned(64)));
 extern struct trap_per_cpu trap_block[NR_CPUS];
 extern void init_cur_cpu_trap(void);
@ -108,8 +116,10 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
 #define TRAP_PER_CPU_NONRESUM_MONDO_PA	0x30
 #define TRAP_PER_CPU_NONRESUM_KBUF_PA	0x38
 #define TRAP_PER_CPU_FAULT_INFO		0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA	0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA	0xc8

-#define TRAP_BLOCK_SZ_SHIFT		7
+#define TRAP_BLOCK_SZ_SHIFT		8

 #include <asm/scratchpad.h>