Merge branch 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

* 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
  [POWERPC] Add arch-specific walk_memory_remove() for 64-bit powerpc
  [POWERPC] Enable hotplug memory remove for 64-bit powerpc
  [POWERPC] Add remove_memory() for 64-bit powerpc
  [POWERPC] Make cell IOMMU fixed mapping printk more useful
  [POWERPC] Fix potential cell IOMMU bug when switching back to default DMA ops
  [POWERPC] Don't enable cell IOMMU fixed mapping if there are no dma-ranges
  [POWERPC] Fix cell IOMMU null pointer explosion on old firmwares
  [POWERPC] spufs: Fix timing dependent false return from spufs_run_spu
  [POWERPC] spufs: No need to have a runnable SPU for libassist update
  [POWERPC] spufs: Update SPU_Status[CISHP] in backing runcntl write
  [POWERPC] spufs: Fix state_mutex leaks
  [POWERPC] Disable G5 NAP mode during SMU commands on U3
This commit is contained in:
Linus Torvalds 2008-02-08 09:31:42 -08:00
commit dde0013782
12 changed files with 166 additions and 60 deletions

View file

@ -272,6 +272,12 @@ config HOTPLUG_CPU
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
config ARCH_HAS_WALK_MEMORY
def_bool y
config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on (PPC_PRPMC2800 || PPC_MULTIPLATFORM) && EXPERIMENTAL

View file

@ -129,6 +129,39 @@ int __devinit arch_add_memory(int nid, u64 start, u64 size)
return __add_pages(zone, start_pfn, nr_pages);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
int remove_memory(u64 start, u64 size)
{
unsigned long start_pfn, end_pfn;
int ret;
start_pfn = start >> PAGE_SHIFT;
end_pfn = start_pfn + (size >> PAGE_SHIFT);
ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
if (ret)
goto out;
/* Arch-specific calls go here - next patch */
out:
return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
/*
* walk_memory_resource() needs to make sure there is no holes in a given
* memory range. On PPC64, since this range comes from /sysfs, the range
* is guaranteed to be valid, non-overlapping and can not contain any
* holes. By the time we get here (memory add or remove), /proc/device-tree
* is updated and correct. Only reason we need to check against device-tree
* would be if we allow user-land to specify a memory range through a
* system call/ioctl etc. instead of doing offline/online through /sysfs.
*/
int
walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
int (*func)(unsigned long, unsigned long, void *))
{
return (*func)(start_pfn, nr_pages, arg);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
void show_mem(void)

View file

@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <asm/prom.h>
@ -789,18 +790,16 @@ static int __init cell_iommu_init_disabled(void)
static u64 cell_iommu_get_fixed_address(struct device *dev)
{
u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR;
struct device_node *tmp, *np;
struct device_node *np;
const u32 *ranges = NULL;
int i, len, best;
np = dev->archdata.of_node;
of_node_get(np);
ranges = of_get_property(np, "dma-ranges", &len);
while (!ranges && np) {
tmp = of_get_parent(np);
of_node_put(np);
np = tmp;
np = of_node_get(dev->archdata.of_node);
while (np) {
ranges = of_get_property(np, "dma-ranges", &len);
if (ranges)
break;
np = of_get_next_parent(np);
}
if (!ranges) {
@ -842,19 +841,18 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
return -EIO;
if (dma_mask == DMA_BIT_MASK(64)) {
if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR)
dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n");
else {
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
set_dma_ops(dev, &dma_iommu_fixed_ops);
cell_dma_dev_setup(dev);
}
if (dma_mask == DMA_BIT_MASK(64) &&
cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
{
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
set_dma_ops(dev, &dma_iommu_fixed_ops);
} else {
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
set_dma_ops(dev, get_pci_dma_ops());
}
cell_dma_dev_setup(dev);
*dev->dma_mask = dma_mask;
return 0;
@ -918,6 +916,18 @@ static int __init cell_iommu_fixed_mapping_init(void)
return -1;
}
/* We must have dma-ranges properties for fixed mapping to work */
for (np = NULL; (np = of_find_all_nodes(np));) {
if (of_find_property(np, "dma-ranges", NULL))
break;
}
of_node_put(np);
if (!np) {
pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
return -1;
}
/* The default setup is to have the fixed mapping sit after the
* dynamic region, so find the top of the largest IOMMU window
* on any axon, then add the size of RAM and that's our max value.
@ -981,8 +991,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
dsize = htab_size_bytes;
}
pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \
"fixed window %lx-%lx\n", iommu->nid, dbase,
printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
dbase + dsize, fbase, fbase + fsize);
cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize);
@ -998,8 +1008,6 @@ static int __init cell_iommu_fixed_mapping_init(void)
dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
set_pci_dma_ops(&dma_iommu_ops);
printk(KERN_DEBUG "IOMMU fixed mapping established.\n");
return 0;
}

View file

@ -288,6 +288,12 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
spin_lock(&ctx->csa.register_lock);
ctx->csa.prob.spu_runcntl_RW = val;
if (val & SPU_RUNCNTL_RUNNABLE) {
ctx->csa.prob.spu_status_R &=
~SPU_STATUS_STOPPED_BY_STOP &
~SPU_STATUS_STOPPED_BY_HALT &
~SPU_STATUS_SINGLE_STEP &
~SPU_STATUS_INVALID_INSTR &
~SPU_STATUS_INVALID_CH;
ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
} else {
ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;

View file

@ -108,7 +108,7 @@ int spufs_handle_class1(struct spu_context *ctx)
u64 ea, dsisr, access;
unsigned long flags;
unsigned flt = 0;
int ret, ret2;
int ret;
/*
* dar and dsisr get passed from the registers
@ -148,13 +148,10 @@ int spufs_handle_class1(struct spu_context *ctx)
ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
/*
* If spu_acquire fails due to a pending signal we just want to return
* EINTR to userspace even if that means missing the dma restart or
* updating the page fault statistics.
* This is nasty: we need the state_mutex for all the bookkeeping even
* if the syscall was interrupted by a signal. ewww.
*/
ret2 = spu_acquire(ctx);
if (ret2)
goto out;
mutex_lock(&ctx->state_mutex);
/*
* Clear dsisr under ctxt lock after handling the fault, so that
@ -185,7 +182,6 @@ int spufs_handle_class1(struct spu_context *ctx)
} else
spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
out:
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
return ret;
}

View file

@ -358,6 +358,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
{
struct spu_context *ctx = vma->vm_file->private_data;
unsigned long area, offset = address - vma->vm_start;
int ret = 0;
spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx);
@ -379,7 +380,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
if (ctx->state == SPU_STATE_SAVED) {
up_read(&current->mm->mmap_sem);
spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx);
spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu);
down_read(&current->mm->mmap_sem);
} else {
@ -388,7 +389,8 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu);
}
spu_release(ctx);
if (!ret)
spu_release(ctx);
return NOPFN_REFAULT;
}
@ -755,23 +757,25 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
count = spu_acquire(ctx);
if (count)
return count;
goto out;
/* wait only for the first element */
count = 0;
if (file->f_flags & O_NONBLOCK) {
if (!spu_ibox_read(ctx, &ibox_data))
if (!spu_ibox_read(ctx, &ibox_data)) {
count = -EAGAIN;
goto out_unlock;
}
} else {
count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
if (count)
goto out;
}
if (count)
goto out;
/* if we can't write at all, return -EFAULT */
count = __put_user(ibox_data, udata);
if (count)
goto out;
goto out_unlock;
for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
int ret;
@ -788,9 +792,9 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
break;
}
out:
out_unlock:
spu_release(ctx);
out:
return count;
}
@ -905,7 +909,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
count = spu_acquire(ctx);
if (count)
return count;
goto out;
/*
* make sure we can at least write one element, by waiting
@ -913,14 +917,16 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
*/
count = 0;
if (file->f_flags & O_NONBLOCK) {
if (!spu_wbox_write(ctx, wbox_data))
if (!spu_wbox_write(ctx, wbox_data)) {
count = -EAGAIN;
goto out_unlock;
}
} else {
count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
if (count)
goto out;
}
if (count)
goto out;
/* write as much as possible */
for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
@ -934,8 +940,9 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
break;
}
out:
out_unlock:
spu_release(ctx);
out:
return count;
}
@ -1598,12 +1605,11 @@ static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
} else {
ret = spufs_wait(ctx->mfc_wq,
spufs_read_mfc_tagstatus(ctx, &status));
if (ret)
goto out;
}
spu_release(ctx);
if (ret)
goto out;
ret = 4;
if (copy_to_user(buffer, &status, 4))
ret = -EFAULT;
@ -1732,6 +1738,8 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
int status;
ret = spufs_wait(ctx->mfc_wq,
spu_send_mfc_command(ctx, cmd, &status));
if (ret)
goto out;
if (status)
ret = status;
}
@ -1785,7 +1793,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
ret = spu_acquire(ctx);
if (ret)
return ret;
goto out;
#if 0
/* this currently hangs */
ret = spufs_wait(ctx->mfc_wq,
@ -1794,12 +1802,13 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
goto out;
ret = spufs_wait(ctx->mfc_wq,
ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
out:
if (ret)
goto out;
#else
ret = 0;
#endif
spu_release(ctx);
out:
return ret;
}

View file

@ -53,7 +53,7 @@ int spu_stopped(struct spu_context *ctx, u32 *stat)
stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
if (*stat & stopped)
if (!(*stat & SPU_STATUS_RUNNING) && (*stat & stopped))
return 1;
dsisr = ctx->csa.dsisr;
@ -354,8 +354,15 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
do {
ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
if (unlikely(ret))
if (unlikely(ret)) {
/*
* This is nasty: we need the state_mutex for all the
* bookkeeping even if the syscall was interrupted by
* a signal. ewww.
*/
mutex_lock(&ctx->state_mutex);
break;
}
spu = ctx->spu;
if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
&ctx->sched_flags))) {
@ -388,16 +395,14 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
SPU_STATUS_STOPPED_BY_HALT |
SPU_STATUS_SINGLE_STEP)));
if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
(((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) &&
(ctx->state == SPU_STATE_RUNNABLE))
ctx->stats.libassist++;
spu_disable_spu(ctx);
ret = spu_run_fini(ctx, npc, &status);
spu_yield(ctx);
if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
(((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
ctx->stats.libassist++;
if ((ret == 0) ||
((ret == -ERESTARTSYS) &&
((status & SPU_STATUS_STOPPED_BY_HALT) ||

View file

@ -268,6 +268,9 @@ extern char *isolated_loader;
* Same as wait_event_interruptible(), except that here
* we need to call spu_release(ctx) before sleeping, and
* then spu_acquire(ctx) when awoken.
*
* Returns with state_mutex re-acquired when successfull or
* with -ERESTARTSYS and the state_mutex dropped when interrupted.
*/
#define spufs_wait(wq, condition) \
@ -278,11 +281,11 @@ extern char *isolated_loader;
prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
spu_release(ctx); \
if (signal_pending(current)) { \
__ret = -ERESTARTSYS; \
break; \
} \
spu_release(ctx); \
schedule(); \
__ret = spu_acquire(ctx); \
if (__ret) \

View file

@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void)
/* Locate core99 Uni-N */
uninorth_node = of_find_node_by_name(NULL, "uni-n");
uninorth_maj = 1;
/* Locate G5 u3 */
if (uninorth_node == NULL) {
uninorth_node = of_find_node_by_name(NULL, "u3");
@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void)
uninorth_node = of_find_node_by_name(NULL, "u4");
uninorth_maj = 4;
}
if (uninorth_node == NULL)
if (uninorth_node == NULL) {
uninorth_maj = 0;
return;
}
addrp = of_get_property(uninorth_node, "reg", NULL);
if (addrp == NULL)
@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev)
pmac_agp_resume(pmac_agp_bridge);
}
EXPORT_SYMBOL(pmac_resume_agp_for_card);
int pmac_get_uninorth_variant(void)
{
return uninorth_maj;
}

View file

@ -85,6 +85,7 @@ struct smu_device {
u32 cmd_buf_abs; /* command buffer absolute */
struct list_head cmd_list;
struct smu_cmd *cmd_cur; /* pending command */
int broken_nap;
struct list_head cmd_i2c_list;
struct smu_i2c_cmd *cmd_i2c_cur; /* pending i2c command */
struct timer_list i2c_timer;
@ -135,6 +136,19 @@ static void smu_start_cmd(void)
fend = faddr + smu->cmd_buf->length + 2;
flush_inval_dcache_range(faddr, fend);
/* We also disable NAP mode for the duration of the command
* on U3 based machines.
* This is slightly racy as it can be written back to 1 by a sysctl
* but that never happens in practice. There seem to be an issue with
* U3 based machines such as the iMac G5 where napping for the
* whole duration of the command prevents the SMU from fetching it
* from memory. This might be related to the strange i2c based
* mechanism the SMU uses to access memory.
*/
if (smu->broken_nap)
powersave_nap = 0;
/* This isn't exactly a DMA mapping here, I suspect
* the SMU is actually communicating with us via i2c to the
* northbridge or the CPU to access RAM.
@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
misc = cmd->misc;
mb();
cmd->status = rc;
/* Re-enable NAP mode */
if (smu->broken_nap)
powersave_nap = 1;
bail:
/* Start next command if any */
smu_start_cmd();
@ -461,7 +479,7 @@ int __init smu_init (void)
if (np == NULL)
return -ENODEV;
printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);
if (smu_cmdbuf_abs == 0) {
printk(KERN_ERR "SMU: Command buffer not allocated !\n");
@ -533,6 +551,11 @@ int __init smu_init (void)
goto fail;
}
/* U3 has an issue with NAP mode when issuing SMU commands */
smu->broken_nap = pmac_get_uninorth_variant() < 4;
if (smu->broken_nap)
printk(KERN_INFO "SMU: using NAP mode workaround\n");
sys_ctrler = SYS_CTRLER_SMU;
return 0;

View file

@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base;
#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v)))
#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v)))
/* Uninorth variant:
*
* 0 = not uninorth
* 1 = U1.x or U2.x
* 3 = U3
* 4 = U4
*/
extern int pmac_get_uninorth_variant(void);
#endif /* __ASM_POWERPC_PMAC_FEATURE_H */
#endif /* __KERNEL__ */

View file

@ -228,7 +228,7 @@ int release_resource(struct resource *old)
EXPORT_SYMBOL(release_resource);
#ifdef CONFIG_MEMORY_HOTPLUG
#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
/*
* Finds the lowest memory reosurce exists within [res->start.res->end)
* the caller must specify res->start, res->end, res->flags.