From 592a607bbc053bc6f614a0e619326009f4b3829e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 7 Feb 2008 14:29:43 +1100 Subject: [PATCH 01/12] [POWERPC] Disable G5 NAP mode during SMU commands on U3 It appears that with the U3 northbridge, if the processor is in NAP mode the whole time while waiting for an SMU command to complete, then the SMU will fail. It could be related to the weird backward mechanism the SMU uses to get to system memory via i2c to the northbridge that doesn't operate properly when the said bridge is in napping along with the CPU. That is on U3 at least, U4 doesn't seem to be affected. This didn't show before NO_HZ as the timer wakeup was enough to make it work it seems, but that is no longer the case. This fixes it by disabling NAP mode on those machines while an SMU command is in flight. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/powermac/feature.c | 11 +++++++++- drivers/macintosh/smu.c | 25 ++++++++++++++++++++++- include/asm-powerpc/pmac_feature.h | 8 ++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index ba931be2175c..5169ecc37123 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void) /* Locate core99 Uni-N */ uninorth_node = of_find_node_by_name(NULL, "uni-n"); + uninorth_maj = 1; + /* Locate G5 u3 */ if (uninorth_node == NULL) { uninorth_node = of_find_node_by_name(NULL, "u3"); @@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void) uninorth_node = of_find_node_by_name(NULL, "u4"); uninorth_maj = 4; } - if (uninorth_node == NULL) + if (uninorth_node == NULL) { + uninorth_maj = 0; return; + } addrp = of_get_property(uninorth_node, "reg", NULL); if (addrp == NULL) @@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev) pmac_agp_resume(pmac_agp_bridge); } EXPORT_SYMBOL(pmac_resume_agp_for_card); + +int pmac_get_uninorth_variant(void) +{ + return uninorth_maj; +} diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 8ba49385c3ff..77ad192962c5 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -85,6 +85,7 @@ struct smu_device { u32 cmd_buf_abs; /* command buffer absolute */ struct list_head cmd_list; struct smu_cmd *cmd_cur; /* pending command */ + int broken_nap; struct list_head cmd_i2c_list; struct smu_i2c_cmd *cmd_i2c_cur; /* pending i2c command */ struct timer_list i2c_timer; @@ -135,6 +136,19 @@ static void smu_start_cmd(void) fend = faddr + smu->cmd_buf->length + 2; flush_inval_dcache_range(faddr, fend); + + /* We also disable NAP mode for the duration of the command + * on U3 based machines. + * This is slightly racy as it can be written back to 1 by a sysctl + * but that never happens in practice. There seem to be an issue with + * U3 based machines such as the iMac G5 where napping for the + * whole duration of the command prevents the SMU from fetching it + * from memory. This might be related to the strange i2c based + * mechanism the SMU uses to access memory. + */ + if (smu->broken_nap) + powersave_nap = 0; + /* This isn't exactly a DMA mapping here, I suspect * the SMU is actually communicating with us via i2c to the * northbridge or the CPU to access RAM. @@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg) misc = cmd->misc; mb(); cmd->status = rc; + + /* Re-enable NAP mode */ + if (smu->broken_nap) + powersave_nap = 1; bail: /* Start next command if any */ smu_start_cmd(); @@ -461,7 +479,7 @@ int __init smu_init (void) if (np == NULL) return -ENODEV; - printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR); + printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR); if (smu_cmdbuf_abs == 0) { printk(KERN_ERR "SMU: Command buffer not allocated !\n"); @@ -533,6 +551,11 @@ int __init smu_init (void) goto fail; } + /* U3 has an issue with NAP mode when issuing SMU commands */ + smu->broken_nap = pmac_get_uninorth_variant() < 4; + if (smu->broken_nap) + printk(KERN_INFO "SMU: using NAP mode workaround\n"); + sys_ctrler = SYS_CTRLER_SMU; return 0; diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h index 26bcb0aa164a..877c35a4356e 100644 --- a/include/asm-powerpc/pmac_feature.h +++ b/include/asm-powerpc/pmac_feature.h @@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base; #define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) #define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) +/* Uninorth variant: + * + * 0 = not uninorth + * 1 = U1.x or U2.x + * 3 = U3 + * 4 = U4 + */ +extern int pmac_get_uninorth_variant(void); #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ #endif /* __KERNEL__ */ From eebead5b8ff89340dc18ceec996157d0eb7d0287 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Feb 2008 15:50:41 +1100 Subject: [PATCH 02/12] [POWERPC] spufs: Fix state_mutex leaks Fix various state_mutex leaks. The worst one was introduced by the interrutible state_mutex conversion but there've been a few before too. Notably spufs_wait now returns without the state_mutex held when returning an error, which actually cleans up some code. Signed-off-by: Christoph Hellwig Signed-off-by: Luke Browning Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/spufs/fault.c | 12 ++---- arch/powerpc/platforms/cell/spufs/file.c | 49 ++++++++++++++--------- arch/powerpc/platforms/cell/spufs/run.c | 9 ++++- arch/powerpc/platforms/cell/spufs/spufs.h | 5 ++- 4 files changed, 45 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index eff4d291ba85..e46d300e21a5 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -108,7 +108,7 @@ int spufs_handle_class1(struct spu_context *ctx) u64 ea, dsisr, access; unsigned long flags; unsigned flt = 0; - int ret, ret2; + int ret; /* * dar and dsisr get passed from the registers @@ -148,13 +148,10 @@ int spufs_handle_class1(struct spu_context *ctx) ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); /* - * If spu_acquire fails due to a pending signal we just want to return - * EINTR to userspace even if that means missing the dma restart or - * updating the page fault statistics. + * This is nasty: we need the state_mutex for all the bookkeeping even + * if the syscall was interrupted by a signal. ewww. */ - ret2 = spu_acquire(ctx); - if (ret2) - goto out; + mutex_lock(&ctx->state_mutex); /* * Clear dsisr under ctxt lock after handling the fault, so that @@ -185,7 +182,6 @@ int spufs_handle_class1(struct spu_context *ctx) } else spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); - out: spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 1018acd1746b..e4ab9d5a86f0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -358,6 +358,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, { struct spu_context *ctx = vma->vm_file->private_data; unsigned long area, offset = address - vma->vm_start; + int ret = 0; spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx); @@ -379,7 +380,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, if (ctx->state == SPU_STATE_SAVED) { up_read(¤t->mm->mmap_sem); spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx); - spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu); down_read(¤t->mm->mmap_sem); } else { @@ -388,7 +389,8 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu); } - spu_release(ctx); + if (!ret) + spu_release(ctx); return NOPFN_REFAULT; } @@ -755,23 +757,25 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, count = spu_acquire(ctx); if (count) - return count; + goto out; /* wait only for the first element */ count = 0; if (file->f_flags & O_NONBLOCK) { - if (!spu_ibox_read(ctx, &ibox_data)) + if (!spu_ibox_read(ctx, &ibox_data)) { count = -EAGAIN; + goto out_unlock; + } } else { count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data)); + if (count) + goto out; } - if (count) - goto out; /* if we can't write at all, return -EFAULT */ count = __put_user(ibox_data, udata); if (count) - goto out; + goto out_unlock; for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { int ret; @@ -788,9 +792,9 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, break; } -out: +out_unlock: spu_release(ctx); - +out: return count; } @@ -905,7 +909,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, count = spu_acquire(ctx); if (count) - return count; + goto out; /* * make sure we can at least write one element, by waiting @@ -913,14 +917,16 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, */ count = 0; if (file->f_flags & O_NONBLOCK) { - if (!spu_wbox_write(ctx, wbox_data)) + if (!spu_wbox_write(ctx, wbox_data)) { count = -EAGAIN; + goto out_unlock; + } } else { count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data)); + if (count) + goto out; } - if (count) - goto out; /* write as much as possible */ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { @@ -934,8 +940,9 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, break; } -out: +out_unlock: spu_release(ctx); +out: return count; } @@ -1598,12 +1605,11 @@ static ssize_t spufs_mfc_read(struct file *file, char __user *buffer, } else { ret = spufs_wait(ctx->mfc_wq, spufs_read_mfc_tagstatus(ctx, &status)); + if (ret) + goto out; } spu_release(ctx); - if (ret) - goto out; - ret = 4; if (copy_to_user(buffer, &status, 4)) ret = -EFAULT; @@ -1732,6 +1738,8 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, int status; ret = spufs_wait(ctx->mfc_wq, spu_send_mfc_command(ctx, cmd, &status)); + if (ret) + goto out; if (status) ret = status; } @@ -1785,7 +1793,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) ret = spu_acquire(ctx); if (ret) - return ret; + goto out; #if 0 /* this currently hangs */ ret = spufs_wait(ctx->mfc_wq, @@ -1794,12 +1802,13 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) goto out; ret = spufs_wait(ctx->mfc_wq, ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); -out: + if (ret) + goto out; #else ret = 0; #endif spu_release(ctx); - +out: return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index b4814c740d8a..e9c61a1a8f98 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -354,8 +354,15 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); - if (unlikely(ret)) + if (unlikely(ret)) { + /* + * This is nasty: we need the state_mutex for all the + * bookkeeping even if the syscall was interrupted by + * a signal. ewww. + */ + mutex_lock(&ctx->state_mutex); break; + } spu = ctx->spu; if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))) { diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 795a1b52538b..2c2fe3c07d72 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -268,6 +268,9 @@ extern char *isolated_loader; * Same as wait_event_interruptible(), except that here * we need to call spu_release(ctx) before sleeping, and * then spu_acquire(ctx) when awoken. + * + * Returns with state_mutex re-acquired when successfull or + * with -ERESTARTSYS and the state_mutex dropped when interrupted. */ #define spufs_wait(wq, condition) \ @@ -278,11 +281,11 @@ extern char *isolated_loader; prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ if (condition) \ break; \ + spu_release(ctx); \ if (signal_pending(current)) { \ __ret = -ERESTARTSYS; \ break; \ } \ - spu_release(ctx); \ schedule(); \ __ret = spu_acquire(ctx); \ if (__ret) \ From 732377c5f5335e227171c76532613f45b4067f25 Mon Sep 17 00:00:00 2001 From: Masato Noguchi Date: Fri, 8 Feb 2008 15:50:41 +1100 Subject: [PATCH 03/12] [POWERPC] spufs: Update SPU_Status[CISHP] in backing runcntl write Currently, the kernel may fail to restart a SPE context which has stopped and been swapped out. This changes spu_backing_runcntl_write to emulate the real SPU_Status register exactly. When the SPU Run Control register is written with SPU_RunCntl[Run] set to '1', the physical SPU automatically sets SPU_Status[R] and clears SPU_Status[CISHP]. Signed-off-by: Masato Noguchi Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/spufs/backing_ops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 50d98a154aaf..64eb15b22040 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -288,6 +288,12 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) spin_lock(&ctx->csa.register_lock); ctx->csa.prob.spu_runcntl_RW = val; if (val & SPU_RUNCNTL_RUNNABLE) { + ctx->csa.prob.spu_status_R &= + ~SPU_STATUS_STOPPED_BY_STOP & + ~SPU_STATUS_STOPPED_BY_HALT & + ~SPU_STATUS_SINGLE_STEP & + ~SPU_STATUS_INVALID_INSTR & + ~SPU_STATUS_INVALID_CH; ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING; } else { ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING; From e66686b414f10f1ef2cd0aa77a03a67e17304773 Mon Sep 17 00:00:00 2001 From: Luke Browning Date: Fri, 8 Feb 2008 15:50:41 +1100 Subject: [PATCH 04/12] [POWERPC] spufs: No need to have a runnable SPU for libassist update We don't need to update the libassist statistic with the context in a runnable state, so do it after spu_disable_spu(). Signed-off-by: Luke Browning Signed-off-by: Jeremy Kerr Acked-by: Christoph Hellwig Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/spufs/run.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index e9c61a1a8f98..f401e51a7979 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -395,16 +395,14 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_SINGLE_STEP))); - if ((status & SPU_STATUS_STOPPED_BY_STOP) && - (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) && - (ctx->state == SPU_STATE_RUNNABLE)) - ctx->stats.libassist++; - - spu_disable_spu(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100)) + ctx->stats.libassist++; + if ((ret == 0) || ((ret == -ERESTARTSYS) && ((status & SPU_STATUS_STOPPED_BY_HALT) || From 85687ff2b4eab47f4d637a0d3a482bb955d3cbd4 Mon Sep 17 00:00:00 2001 From: Luke Browning Date: Fri, 8 Feb 2008 15:50:41 +1100 Subject: [PATCH 05/12] [POWERPC] spufs: Fix timing dependent false return from spufs_run_spu Stop bits are only valid when the running bit is not set. Status bits carry over from one invocation of spufs_run_spu() to another, so the RUNNING bit gets added to the previous state of the register which may have been a remote library call. In this case, it looks like another library routine should be invoked, but the spe is actually running. This fixes a problem with a testcase that exercises the scheduler. Signed-off-by: Luke Browning Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/spufs/run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index f401e51a7979..fca22e18069a 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -53,7 +53,7 @@ int spu_stopped(struct spu_context *ctx, u32 *stat) stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; - if (*stat & stopped) + if (!(*stat & SPU_STATUS_RUNNING) && (*stat & stopped)) return 1; dsisr = ctx->csa.dsisr; From ccd05d086f82dba2ab117dcaf4a38cbb2863a439 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 Feb 2008 16:37:02 +1100 Subject: [PATCH 06/12] [POWERPC] Fix cell IOMMU null pointer explosion on old firmwares The cell IOMMU fixed mapping support has a null pointer bug if you run it on older firmwares that don't contain the "dma-ranges" properties. Fix it and convert to using of_get_next_parent() while we're there. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/iommu.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index df330666ccc9..a276064471b3 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -789,18 +790,16 @@ static int __init cell_iommu_init_disabled(void) static u64 cell_iommu_get_fixed_address(struct device *dev) { u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR; - struct device_node *tmp, *np; + struct device_node *np; const u32 *ranges = NULL; int i, len, best; - np = dev->archdata.of_node; - of_node_get(np); - ranges = of_get_property(np, "dma-ranges", &len); - while (!ranges && np) { - tmp = of_get_parent(np); - of_node_put(np); - np = tmp; + np = of_node_get(dev->archdata.of_node); + while (np) { ranges = of_get_property(np, "dma-ranges", &len); + if (ranges) + break; + np = of_get_next_parent(np); } if (!ranges) { From 0e0b47abb71a2c4aed5895c01f41827dbd8a981c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 Feb 2008 16:37:03 +1100 Subject: [PATCH 07/12] [POWERPC] Don't enable cell IOMMU fixed mapping if there are no dma-ranges In order for the cell IOMMU fixed mapping to work we need "dma-ranges" properties in the device tree. If there are none then there's no point enabling the fixed mapping support. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index a276064471b3..1f7b25474086 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -917,6 +917,18 @@ static int __init cell_iommu_fixed_mapping_init(void) return -1; } + /* We must have dma-ranges properties for fixed mapping to work */ + for (np = NULL; (np = of_find_all_nodes(np));) { + if (of_find_property(np, "dma-ranges", NULL)) + break; + } + of_node_put(np); + + if (!np) { + pr_debug("iommu: no dma-ranges found, no fixed mapping\n"); + return -1; + } + /* The default setup is to have the fixed mapping sit after the * dynamic region, so find the top of the largest IOMMU window * on any axon, then add the size of RAM and that's our max value. From 4a8df1507eaeefc9739e3762db606caa08edba98 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 Feb 2008 16:37:04 +1100 Subject: [PATCH 08/12] [POWERPC] Fix potential cell IOMMU bug when switching back to default DMA ops If we get a 64-bit dma mask we switch to the fixed ops and call cell_dma_dev_setup(). If the driver then switches back to a 32-bit dma mask for any reason we don't call cell_dma_dev_setup() again, which has the potential to leave bogus data in dev->archdata.dma_data. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/iommu.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 1f7b25474086..5cdcd3638250 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -841,19 +841,18 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; - if (dma_mask == DMA_BIT_MASK(64)) { - if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR) - dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n"); - else { - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); - set_dma_ops(dev, &dma_iommu_fixed_ops); - cell_dma_dev_setup(dev); - } + if (dma_mask == DMA_BIT_MASK(64) && + cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) + { + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + set_dma_ops(dev, &dma_iommu_fixed_ops); } else { dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); set_dma_ops(dev, get_pci_dma_ops()); } + cell_dma_dev_setup(dev); + *dev->dma_mask = dma_mask; return 0; From 44621be4b563fbce32007ebfac91dfe8f5692743 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 Feb 2008 16:37:04 +1100 Subject: [PATCH 09/12] [POWERPC] Make cell IOMMU fixed mapping printk more useful Currently the cell IOMMU fixed mapping just printks that it's been setup, which is not particularly useful. Much more interesting is the address ranges for the different windows. This adds one line to dmesg on a blade. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 5cdcd3638250..edab631a8dcb 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -991,8 +991,8 @@ static int __init cell_iommu_fixed_mapping_init(void) dsize = htab_size_bytes; } - pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \ - "fixed window %lx-%lx\n", iommu->nid, dbase, + printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx " + "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase, dbase + dsize, fbase, fbase + fsize); cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize); @@ -1008,8 +1008,6 @@ static int __init cell_iommu_fixed_mapping_init(void) dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; set_pci_dma_ops(&dma_iommu_ops); - printk(KERN_DEBUG "IOMMU fixed mapping established.\n"); - return 0; } From aa620abe756207222d234f785d41435fac486e06 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Tue, 5 Feb 2008 00:10:16 -0800 Subject: [PATCH 10/12] [POWERPC] Add remove_memory() for 64-bit powerpc Supply remove_memory() function for 64-bit powerpc. This is still not quite complete as it needs to do some more arch-specific stuff, which will be added in a later patch. Signed-off-by: Badari Pulavarty Cc: Yasunori Goto Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mem.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 93a5c53e3423..f0a1fd268b7a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -129,6 +129,22 @@ int __devinit arch_add_memory(int nid, u64 start, u64 size) return __add_pages(zone, start_pfn, nr_pages); } +#ifdef CONFIG_MEMORY_HOTREMOVE +int remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn, end_pfn; + int ret; + + start_pfn = start >> PAGE_SHIFT; + end_pfn = start_pfn + (size >> PAGE_SHIFT); + ret = offline_pages(start_pfn, end_pfn, 120 * HZ); + if (ret) + goto out; + /* Arch-specific calls go here - next patch */ +out: + return ret; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ void show_mem(void) From 1482471d19e77d794012dbacaa65c44ceaae37bb Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Tue, 5 Feb 2008 00:10:17 -0800 Subject: [PATCH 11/12] [POWERPC] Enable hotplug memory remove for 64-bit powerpc Enable hotplug memory remove for ppc64. Signed-off-by: Badari Pulavarty Cc: Dave Hansen Cc: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8dcac0b22d68..d55063c021d5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -272,6 +272,9 @@ config HOTPLUG_CPU config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on (PPC_PRPMC2800 || PPC_MULTIPLATFORM) && EXPERIMENTAL From a99824f327c748b2753f4fa570eb1fefcd6a9c4d Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Tue, 5 Feb 2008 00:10:18 -0800 Subject: [PATCH 12/12] [POWERPC] Add arch-specific walk_memory_remove() for 64-bit powerpc walk_memory_resource() verifies if there are holes in a given memory range, by checking against /proc/iomem. On x86/ia64 system memory is represented in /proc/iomem. On powerpc, we don't show system memory as IO resource in /proc/iomem - instead it's maintained in /proc/device-tree. This provides a way for an architecture to provide its own walk_memory_resource() function. On powerpc, the memory region is small (16MB), contiguous and non-overlapping. So extra checking against the device-tree is not needed. Signed-off-by: Badari Pulavarty Acked-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: Benjamin Herrenschmidt Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 3 +++ arch/powerpc/mm/mem.c | 17 +++++++++++++++++ kernel/resource.c | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d55063c021d5..26b963c33c88 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -272,6 +272,9 @@ config HOTPLUG_CPU config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y +config ARCH_HAS_WALK_MEMORY + def_bool y + config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f0a1fd268b7a..be5c506779a7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -145,6 +145,23 @@ int remove_memory(u64 start, u64 size) return ret; } #endif /* CONFIG_MEMORY_HOTREMOVE */ + +/* + * walk_memory_resource() needs to make sure there is no holes in a given + * memory range. On PPC64, since this range comes from /sysfs, the range + * is guaranteed to be valid, non-overlapping and can not contain any + * holes. By the time we get here (memory add or remove), /proc/device-tree + * is updated and correct. Only reason we need to check against device-tree + * would be if we allow user-land to specify a memory range through a + * system call/ioctl etc. instead of doing offline/online through /sysfs. + */ +int +walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, + int (*func)(unsigned long, unsigned long, void *)) +{ + return (*func)(start_pfn, nr_pages, arg); +} + #endif /* CONFIG_MEMORY_HOTPLUG */ void show_mem(void) diff --git a/kernel/resource.c b/kernel/resource.c index 2eb553d9b517..82aea814d409 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -228,7 +228,7 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -#ifdef CONFIG_MEMORY_HOTPLUG +#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* * Finds the lowest memory reosurce exists within [res->start.res->end) * the caller must specify res->start, res->end, res->flags.