From 254be490f257fc3f76ca5f869ac8d107b3827025 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 27 Aug 2009 01:45:39 +0000 Subject: [PATCH 01/17] hvc_console: Provide (un)locked version for hvc_resize() Rename the locking free hvc_resize() function to __hvc_resize() and provide an inline function that locks the hvc_struct and calls __hvc_resize(). The rationale for this patch is that virtio_console calls the hvc_resize() function without locking the hvc_struct. So it needs to call the lock itself. According to naming rules, the unlocked version is renamed and prefixed with "__". References to unlocked function calls in hvc back-ends has been updated. Signed-off-by: Hendrik Brueckner Acked-by: Christian Borntraeger Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_console.c | 6 +++--- drivers/char/hvc_console.h | 12 +++++++++++- drivers/char/hvc_iucv.c | 4 +++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 25ce15bb1c08..a632f25f144a 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -678,7 +678,7 @@ int hvc_poll(struct hvc_struct *hp) EXPORT_SYMBOL_GPL(hvc_poll); /** - * hvc_resize() - Update terminal window size information. + * __hvc_resize() - Update terminal window size information. * @hp: HVC console pointer * @ws: Terminal window size structure * @@ -687,12 +687,12 @@ EXPORT_SYMBOL_GPL(hvc_poll); * * Locking: Locking free; the function MUST be called holding hp->lock */ -void hvc_resize(struct hvc_struct *hp, struct winsize ws) +void __hvc_resize(struct hvc_struct *hp, struct winsize ws) { hp->ws = ws; schedule_work(&hp->tty_resize); } -EXPORT_SYMBOL_GPL(hvc_resize); +EXPORT_SYMBOL_GPL(__hvc_resize); /* * This kthread is either polling or interrupt driven. This is determined by diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 3c85d78c975c..10950ca706d8 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -28,6 +28,7 @@ #define HVC_CONSOLE_H #include #include +#include /* * This is the max number of console adapters that can/will be found as @@ -88,7 +89,16 @@ int hvc_poll(struct hvc_struct *hp); void hvc_kick(void); /* Resize hvc tty terminal window */ -extern void hvc_resize(struct hvc_struct *hp, struct winsize ws); +extern void __hvc_resize(struct hvc_struct *hp, struct winsize ws); + +static inline void hvc_resize(struct hvc_struct *hp, struct winsize ws) +{ + unsigned long flags; + + spin_lock_irqsave(&hp->lock, flags); + __hvc_resize(hp, ws); + spin_unlock_irqrestore(&hp->lock, flags); +} /* default notifier for irq based notification */ extern int notifier_add_irq(struct hvc_struct *hp, int data); diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index 0ecac7e532f6..b8a5d654d3d0 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -273,7 +273,9 @@ static int hvc_iucv_write(struct hvc_iucv_private *priv, case MSG_TYPE_WINSIZE: if (rb->mbuf->datalen != sizeof(struct winsize)) break; - hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data)); + /* The caller must ensure that the hvc is locked, which + * is the case when called from hvc_iucv_get_chars() */ + __hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data)); break; case MSG_TYPE_ERROR: /* ignored ... */ From 0f3372741f2de8dc85a60be737b519a47b395b85 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Wed, 9 Sep 2009 05:02:24 +0000 Subject: [PATCH 02/17] powerpc: kmalloc failure ignored in vio_build_iommu_table() Prevent NULL dereference if kmalloc() fails. Signed-off-by: Roel Kluin Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index bc7b41edbdfc..1b4f674ad7c4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1054,6 +1054,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) return NULL; tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); + if (tbl == NULL) + return NULL; of_parse_dma_window(dev->dev.archdata.of_node, dma_window, &tbl->it_index, &offset, &size); From ad08587e5df17e192a57437bfedaba125998de25 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 12 Sep 2009 16:08:08 +0000 Subject: [PATCH 03/17] powerpc/pmc: Don't access lppaca on Book3E It doesn't exist ! Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index ccc68b50d05d..5a9ede4962cb 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -29,7 +29,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); void ppc_enable_pmcs(void); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include static inline void ppc_set_pmu_inuse(int inuse) From 144ef909c09b60c97b3c20b69ea30abd1e60e54d Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Mon, 14 Sep 2009 19:57:02 +0000 Subject: [PATCH 04/17] powerpc: Check for unsupported relocs when using CONFIG_RELOCATABLE When using CONFIG_RELOCATABLE, we build the kernel as a position independent executable. The kernel then uses a little bit of relocation code to relocate itself. That code only deals with R_PPC64_RELATIVE relocations though. If for some reason you use assembly constructs such as LOAD_REG_IMMEDIATE() to load the address of a symbol, you'll generate different kinds of relocations that won't be processed properly and bad things will happen. (We have 2 such bugs today). The perl script tries to filter out "known" bad ones. It's possible that we are missing some in the case of a weak function that nobody implements, we'll see if we get false positive and fix it. Signed-off-by: Tony Breeds Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Makefile | 11 +++++++ arch/powerpc/relocs_check.pl | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100755 arch/powerpc/relocs_check.pl diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index aacf629c1a9f..1a54a3b3a3fa 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -164,6 +164,17 @@ PHONY += $(BOOT_TARGETS) boot := arch/$(ARCH)/boot +ifeq ($(CONFIG_RELOCATABLE),y) +quiet_cmd_relocs_check = CALL $< + cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux" + +PHONY += relocs_check +relocs_check: arch/powerpc/relocs_check.pl vmlinux + $(call cmd,relocs_check) + +zImage: relocs_check +endif + $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl new file mode 100755 index 000000000000..d2571096c3e9 --- /dev/null +++ b/arch/powerpc/relocs_check.pl @@ -0,0 +1,56 @@ +#!/usr/bin/perl + +# Copyright © 2009 IBM Corporation + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +# This script checks the relcoations of a vmlinux for "suspicious" +# relocations. + +use strict; +use warnings; + +if ($#ARGV != 1) { + die "$0 [path to objdump] [path to vmlinux]\n"; +} + +# Have Kbuild supply the path to objdump so we handle cross compilation. +my $objdump = shift; +my $vmlinux = shift; +my $bad_relocs_count = 0; +my $bad_relocs = ""; +my $old_binutils = 0; + +open(FD, "$objdump -R $vmlinux|") or die; +while () { + study $_; + + # Only look at relcoation lines. + next if (!/\s+R_/); + + # These relocations are okay + next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or + /R_PPC64_ADDR64\s+mach_/); + + # If we see this type of relcoation it's an idication that + # we /may/ be using an old version of binutils. + if (/R_PPC64_UADDR64/) { + $old_binutils++; + } + + $bad_relocs_count++; + $bad_relocs .= $_; +} + +if ($bad_relocs_count) { + print "WARNING: $bad_relocs_count bad relocations\n"; + print $bad_relocs; +} + +if ($old_binutils) { + print "WARNING: You need at binutils >= 2.19 to build a ". + "CONFIG_RELCOATABLE kernel\n"; +} From 5c8f382c0b96aedcd709c05eae13bd684e16417e Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 16 Sep 2009 03:08:58 +0000 Subject: [PATCH 05/17] powerpc/book3e-64: Remove duplicated #include Remove duplicated #include('s) in arch/powerpc/kernel/exceptions-64e.S Signed-off-by: Huang Weiyi Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9048f96237f6..24dcc0ecf246 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include From b9eceb2307f8dda124669a9dc213aad8c1569b5a Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 16 Sep 2009 03:09:22 +0000 Subject: [PATCH 06/17] powerpc/mm: Remove duplicated #include Remove duplicated #include('s) in arch/powerpc/mm/tlb_low_64e.S Signed-off-by: Huang Weiyi Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/tlb_low_64e.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index ef1cccf71173..f288279e679d 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -18,7 +18,6 @@ #include #include #include -#include #include #include From 1cebd7a0f62804ca24f7b7b35e8105000b9e879a Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 21 Sep 2009 08:26:34 +0000 Subject: [PATCH 07/17] powerpc: Rename get_dma_direct_offset get_dma_offset The former is no longer really accurate with the swiotlb case now a possibility. I also move it into dma-mapping.h - it no longer needs to be in dma.c, and there are about to be some more accessors that should all end up in the same place. A comment is added to indicate that this function is not used in configs where there is no simple dma offset, such as the iommu case. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/dma-mapping.h | 21 ++++++++++++++++++--- arch/powerpc/kernel/dma.c | 15 ++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index cb2ca41dd526..34b919fe8cda 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,6 @@ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size, extern void dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -extern unsigned long get_dma_direct_offset(struct device *dev); #ifdef CONFIG_NOT_COHERENT_CACHE /* @@ -90,6 +89,22 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) dev->archdata.dma_ops = ops; } +/* + * get_dma_offset() + * + * Get the dma offset on configurations where the dma address can be determined + * from the physical address by looking at a simple offset. Direct dma and + * swiotlb use this function, but it is typically not used by implementations + * with an iommu. + */ +static inline unsigned long get_dma_offset(struct device *dev) +{ + if (dev) + return (unsigned long)dev->archdata.dma_data; + + return PCI_DRAM_OFFSET; +} + /* this will be removed soon */ #define flush_write_buffers() @@ -181,12 +196,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return paddr + get_dma_direct_offset(dev); + return paddr + get_dma_offset(dev); } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { - return daddr - get_dma_direct_offset(dev); + return daddr - get_dma_offset(dev); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 21b784d7e7d0..6215062caf8c 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -21,13 +21,6 @@ * default the offset is PCI_DRAM_OFFSET. */ -unsigned long get_dma_direct_offset(struct device *dev) -{ - if (dev) - return (unsigned long)dev->archdata.dma_data; - - return PCI_DRAM_OFFSET; -} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) @@ -37,7 +30,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; - *dma_handle += get_dma_direct_offset(dev); + *dma_handle += get_dma_offset(dev); return ret; #else struct page *page; @@ -51,7 +44,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, return NULL; ret = page_address(page); memset(ret, 0, size); - *dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); + *dma_handle = virt_to_abs(ret) + get_dma_offset(dev); return ret; #endif @@ -75,7 +68,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int i; for_each_sg(sgl, sg, nents, i) { - sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); + sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -110,7 +103,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, { BUG_ON(dir == DMA_NONE); __dma_sync_page(page, offset, size, dir); - return page_to_phys(page) + offset + get_dma_direct_offset(dev); + return page_to_phys(page) + offset + get_dma_offset(dev); } static inline void dma_direct_unmap_page(struct device *dev, From 738ef42e32fe95553a424c04016b936c9f6c9afb Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 21 Sep 2009 08:26:35 +0000 Subject: [PATCH 08/17] powerpc: Change archdata dma_data to a union Sometimes this is used to hold a simple offset, and sometimes it is used to hold a pointer. This patch changes it to a union containing void * and dma_addr_t. get/set accessors are also provided, because it was getting a bit ugly to get to the actual data. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/device.h | 11 ++++++++++- arch/powerpc/include/asm/dma-mapping.h | 10 ++++++++-- arch/powerpc/include/asm/iommu.h | 10 ++++++++++ arch/powerpc/kernel/dma-iommu.c | 16 ++++++++-------- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/vio.c | 2 +- arch/powerpc/platforms/cell/beat_iommu.c | 2 +- arch/powerpc/platforms/cell/iommu.c | 9 +++------ arch/powerpc/platforms/iseries/iommu.c | 2 +- arch/powerpc/platforms/pasemi/iommu.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 8 ++++---- arch/powerpc/sysdev/dart_iommu.c | 2 +- 12 files changed, 49 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 9dade15d1ab4..6d94d27ed850 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -15,7 +15,16 @@ struct dev_archdata { /* DMA operations on that device */ struct dma_map_ops *dma_ops; - void *dma_data; + + /* + * When an iommu is in use, dma_data is used as a ptr to the base of the + * iommu_table. Otherwise, it is a simple numerical offset. + */ + union { + dma_addr_t dma_offset; + void *iommu_table_base; + } dma_data; + #ifdef CONFIG_SWIOTLB dma_addr_t max_direct_dma_addr; #endif diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 34b919fe8cda..e281daebddca 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -97,14 +97,20 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) * swiotlb use this function, but it is typically not used by implementations * with an iommu. */ -static inline unsigned long get_dma_offset(struct device *dev) +static inline dma_addr_t get_dma_offset(struct device *dev) { if (dev) - return (unsigned long)dev->archdata.dma_data; + return dev->archdata.dma_data.dma_offset; return PCI_DRAM_OFFSET; } +static inline void set_dma_offset(struct device *dev, dma_addr_t off) +{ + if (dev) + dev->archdata.dma_data.dma_offset = off; +} + /* this will be removed soon */ #define flush_write_buffers() diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7464c0daddd1..edfc9803ec91 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -70,6 +70,16 @@ struct iommu_table { struct scatterlist; +static inline void set_iommu_table_base(struct device *dev, void *base) +{ + dev->archdata.dma_data.iommu_table_base = base; +} + +static inline void *get_iommu_table_base(struct device *dev) +{ + return dev->archdata.dma_data.iommu_table_base; +} + /* Frees table for an individual device node */ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 87ddb3fb948c..37771a518119 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -18,7 +18,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, + return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, device_to_mask(dev), flag, dev_to_node(dev)); } @@ -26,7 +26,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } /* Creates TCEs for a user provided buffer. The user buffer must be @@ -39,8 +39,8 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size, - device_to_mask(dev), direction, attrs); + return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, + size, device_to_mask(dev), direction, attrs); } @@ -48,7 +48,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction, + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); } @@ -57,7 +57,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, + return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); } @@ -65,14 +65,14 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, + iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); } /* We support DMA to/from any memory page via the iommu */ static int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = dev->archdata.dma_data; + struct iommu_table *tbl = get_iommu_table_base(dev); if (!tbl || tbl->it_offset > mask) { printk(KERN_INFO diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e9f4840096b3..bb8209e34931 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1117,7 +1117,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) /* Hook up default DMA ops */ sd->dma_ops = pci_dma_ops; - sd->dma_data = (void *)PCI_DRAM_OFFSET; + set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); /* Additional platform DMA/iommu setup */ if (ppc_md.pci_dma_dev_setup) diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 1b4f674ad7c4..77f64218abf3 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1235,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) vio_cmo_set_dma_ops(viodev); else viodev->dev.archdata.dma_ops = &dma_iommu_ops; - viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); + set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); /* init generic 'struct device' fields: */ diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c index 93b0efddd658..39d361c5c6d2 100644 --- a/arch/powerpc/platforms/cell/beat_iommu.c +++ b/arch/powerpc/platforms/cell/beat_iommu.c @@ -77,7 +77,7 @@ static void __init celleb_init_direct_mapping(void) static void celleb_dma_dev_setup(struct device *dev) { dev->archdata.dma_ops = get_pci_dma_ops(); - dev->archdata.dma_data = (void *)celleb_dma_direct_offset; + set_dma_offset(dev, celleb_dma_direct_offset); } static void celleb_pci_dma_dev_setup(struct pci_dev *pdev) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 416db17eb18f..ca5bfdfe47f2 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -657,15 +657,13 @@ static void cell_dma_dev_setup_fixed(struct device *dev); static void cell_dma_dev_setup(struct device *dev) { - struct dev_archdata *archdata = &dev->archdata; - /* Order is important here, these are not mutually exclusive */ if (get_dma_ops(dev) == &dma_iommu_fixed_ops) cell_dma_dev_setup_fixed(dev); else if (get_pci_dma_ops() == &dma_iommu_ops) - archdata->dma_data = cell_get_iommu_table(dev); + set_iommu_table_base(dev, cell_get_iommu_table(dev)); else if (get_pci_dma_ops() == &dma_direct_ops) - archdata->dma_data = (void *)cell_dma_direct_offset; + set_dma_offset(dev, cell_dma_direct_offset); else BUG(); } @@ -973,11 +971,10 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) static void cell_dma_dev_setup_fixed(struct device *dev) { - struct dev_archdata *archdata = &dev->archdata; u64 addr; addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; - archdata->dma_data = (void *)addr; + set_dma_offset(dev, addr); dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); } diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index 6c1e1011959e..9d53cb481a7c 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -193,7 +193,7 @@ static void pci_dma_dev_setup_iseries(struct pci_dev *pdev) pdn->iommu_table = iommu_init_table(tbl, -1); else kfree(tbl); - pdev->dev.archdata.dma_data = pdn->iommu_table; + set_iommu_table_base(&pdev->dev, pdn->iommu_table); } #else #define pci_dma_dev_setup_iseries NULL diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index a0ff03a3d8da..7b1d608ea3c8 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -189,7 +189,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) } #endif - dev->dev.archdata.dma_data = &iommu_table_iobmap; + set_iommu_table_base(&dev->dev, &iommu_table_iobmap); } static void pci_dma_bus_setup_null(struct pci_bus *b) { } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 661c8e02bcba..1a0000a4b6d6 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -482,7 +482,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) phb->node); iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); return; } @@ -494,7 +494,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) dn = dn->parent; if (dn && PCI_DN(dn)) - dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); @@ -538,7 +538,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) */ if (dma_window == NULL || pdn->parent == NULL) { pr_debug(" no dma window for device, linking to parent\n"); - dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table); return; } @@ -554,7 +554,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - dev->dev.archdata.dma_data = pci->iommu_table; + set_iommu_table_base(&dev->dev, pci->iommu_table); } #else /* CONFIG_PCI */ #define pci_dma_bus_setup_pSeries NULL diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 89639ecbf381..ae3c4db86fe8 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -297,7 +297,7 @@ static void pci_dma_dev_setup_dart(struct pci_dev *dev) /* We only have one iommu table on the mac for now, which makes * things simple. Setup all PCI devices to point to this table */ - dev->dev.archdata.dma_data = &iommu_table_dart; + set_iommu_table_base(&dev->dev, &iommu_table_dart); } static void pci_dma_bus_setup_dart(struct pci_bus *bus) From 8bbde7a7062facf8af35bcc9a64cbafe8f36f3cf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 16:52:35 +0000 Subject: [PATCH 09/17] powerpc: Move 64bit heap above 1TB on machines with 1TB segments If we are using 1TB segments and we are allowed to randomise the heap, we can put it above 1TB so it is backed by a 1TB segment. Otherwise the heap will be in the bottom 1TB which always uses 256MB segments and this may result in a performance penalty. This functionality is disabled when heap randomisation is turned off: echo 1 > /proc/sys/kernel/randomize_va_space which may be useful when trying to allocate the maximum amount of 16M or 16G pages. On a microbenchmark that repeatedly touches 32GB of memory with a stride of 256MB + 4kB (designed to stress 256MB segments while still mapping nicely into the L1 cache), we see the improvement: Force malloc to use heap all the time: # export MALLOC_MMAP_MAX_=0 MALLOC_TRIM_THRESHOLD_=-1 Disable heap randomization: # echo 1 > /proc/sys/kernel/randomize_va_space # time ./test 12.51s Enable heap randomization: # echo 2 > /proc/sys/kernel/randomize_va_space # time ./test 1.70s Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0a3216433051..1168c5f440ab 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1165,7 +1165,22 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long base = mm->brk; + unsigned long ret; + +#ifdef CONFIG_PPC64 + /* + * If we are using 1TB segments and we are allowed to randomise + * the heap, we can put it above 1TB so it is backed by a 1TB + * segment. Otherwise the heap will be in the bottom 1TB + * which always uses 256MB segments and this may result in a + * performance penalty. + */ + if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); +#endif + + ret = PAGE_ALIGN(base + brk_rnd()); if (ret < mm->brk) return mm->brk; From f2053f1a7bf6005b4e81826b1ac8d0b4117c4cf0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 16:57:40 +0000 Subject: [PATCH 10/17] powerpc/perf_counter: Fix vdso detection perf_counter uses arch_vma_name() to detect a vdso region which in turn uses current->mm->context.vdso_base. We need to initialise this before doing the mmap or else we fail to detect the vdso. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vdso.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 3faaf29bdb29..94e2df3cae07 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -240,6 +240,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto fail_mmapsem; } + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + /* * our vma flags don't have VM_WRITE so by default, the process isn't * allowed to write those pages. @@ -260,11 +267,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pagelist); - if (rc) + if (rc) { + current->mm->context.vdso_base = 0; goto fail_mmapsem; - - /* Put vDSO base into mm struct */ - current->mm->context.vdso_base = vdso_base; + } up_write(&mm->mmap_sem); return 0; From ea55bf29126f0066a4e82a8545437494ff4fc431 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 19:56:43 +0000 Subject: [PATCH 11/17] powerpc: Increase NODES_SHIFT on 64bit from 4 to 8 Some System p configurations can already have more than 16 nodes so we need to increase NODES_SHIFT. I chose 256 to give us some room to grow in the future, although we can look at something smaller if the memory bloat is considered too much. Unless we clamp MAX_ACTIVE_REGIONS we end up with 300kB of extra bloat in early_node_map in mm/page_alloc.c: < 6144 early_node_map > 307200 early_node_map due to: #if MAX_NUMNODES >= 32 /* If there can be many nodes, allow up to 50 holes per node */ #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) #else /* By default, allow up to 256 distinct regions */ #define MAX_ACTIVE_REGIONS 256 Since our memory is mostly contiguous it seems reasonable to keep this at 256 for now. I also set 32bit to 32 to save space (is there any chance a 32bit system will have more than 32 discontiguous memory ranges?). Even with that fixed we have a few data structures that grow: < 896 bootmem_node_data > 14336 bootmem_node_data < 1280 node_devices > 20480 node_devices < 25088 kmalloc_caches > 59648 kmalloc_caches < 1632 hstates > 21792 hstates Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4fd479059d65..10a0a5488a44 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -385,9 +385,15 @@ config NUMA config NODES_SHIFT int + default "8" if PPC64 default "4" depends on NEED_MULTIPLE_NODES +config MAX_ACTIVE_REGIONS + int + default "256" if PPC64 + default "32" + config ARCH_SELECT_MEMORY_MODEL def_bool y depends on PPC64 From 049d0497060bc8db944f7b4984271327448b3603 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 20:47:39 +0000 Subject: [PATCH 12/17] powerpc: Fix ibm,client-architecture-support printout On machines without the ibm,client-architecture-support call we were missing a newline. We may as well print the full name in all its glory too - its ibm,client-architecture-support, not ibm,client-architecture as I mistakenly wrote (a name only an IBM architect could love). For my penance I will write out ibm,client-architecture-support 100 times. Before: Calling ibm,client-architecture...command line: root=/dev/sda6 console=hvc0 quiet After: Calling ibm,client-architecture-support... not implemented command line: root=/dev/sda6 console=hvc0 Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 864334b337a3..bafac2e41ae1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -800,7 +800,7 @@ static void __init prom_send_capabilities(void) root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* try calling the ibm,client-architecture-support method */ - prom_printf("Calling ibm,client-architecture..."); + prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, @@ -814,6 +814,7 @@ static void __init prom_send_capabilities(void) return; } call_prom("close", 1, 0, root); + prom_printf(" not implemented\n"); } /* no ibm,client-architecture-support call, try the old way */ From 142597dbbd8a1d516af3dacfa00037f21612e865 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Tue, 22 Sep 2009 05:18:09 +0000 Subject: [PATCH 13/17] powerpc: Cleanup linker script using new linker script macros. Signed-off-by: Tim Abbott Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@ozlabs.org Cc: Sam Ravnborg Acked-by: Sam Ravnborg Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 69 +++++++------------------------ 1 file changed, 15 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 58da4070723d..f56429362a12 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #include #include #include +#include ENTRY(_stext) @@ -71,12 +72,7 @@ SECTIONS /* Read-only data */ RODATA - /* Exception & bug tables */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(0) NOTES :kernel :notes @@ -93,12 +89,7 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); __init_begin = .; - - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } :kernel + INIT_TEXT_SECTION(PAGE_SIZE) :kernel /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -122,23 +113,16 @@ SECTIONS #endif } - . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; + INIT_SETUP(16) } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } + INIT_CALLS + } .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + CON_INITCALL } SECURITY_INIT @@ -169,14 +153,10 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; + INIT_RAM_FS } -#endif + PERCPU(PAGE_SIZE) . = ALIGN(8); @@ -240,36 +220,24 @@ SECTIONS #endif /* The initial task and kernel stack */ -#ifdef CONFIG_PPC32 - . = ALIGN(8192); -#else - . = ALIGN(16384); -#endif .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) + INIT_TASK_DATA(THREAD_SIZE) } - . = ALIGN(PAGE_SIZE); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) + PAGE_ALIGNED_DATA(PAGE_SIZE) } - . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) } - . = ALIGN(L1_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) + READ_MOSTLY_DATA(L1_CACHE_BYTES) } - . = ALIGN(PAGE_SIZE); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; + NOSAVE_DATA } . = ALIGN(PAGE_SIZE); @@ -280,14 +248,7 @@ SECTIONS * And finally the bss */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __bss_start = .; - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - __bss_stop = .; - } + BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); _end = . ; From f32af63ed1327451cb91e3816fa043b6c2c52db1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 22 Sep 2009 18:12:26 +0000 Subject: [PATCH 14/17] powerpc/mm: Fix 40x and 8xx vs. _PAGE_SPECIAL The test to check whether we have _PAGE_SPECIAL defined is broken, since we always define it, just not always to a meaninful value :-) That broke 8xx and 40x under some circumstances. This fixes it by adding _PAGE_SPECIAL for both of these since they had a free PTE bit, and removing the condition around advertising it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pte-40x.h | 1 + arch/powerpc/include/asm/pte-8xx.h | 1 + arch/powerpc/include/asm/pte-common.h | 5 ----- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h index 6c3e1f4378d4..ec0b0b0d1df9 100644 --- a/arch/powerpc/include/asm/pte-40x.h +++ b/arch/powerpc/include/asm/pte-40x.h @@ -43,6 +43,7 @@ #define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ #define _PAGE_USER 0x010 /* matches one of the zone permission bits */ +#define _PAGE_SPECIAL 0x020 /* software: Special page */ #define _PAGE_RW 0x040 /* software: Writes permitted */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ #define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 94e979718dcf..dd5ea95fe61e 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -32,6 +32,7 @@ #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ +#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ /* These five software bits must be masked out when the entry is loaded * into the TLB. diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index c3b65076a263..f2b370180a09 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -25,9 +25,6 @@ #ifndef _PAGE_WRITETHRU #define _PAGE_WRITETHRU 0 #endif -#ifndef _PAGE_SPECIAL -#define _PAGE_SPECIAL 0 -#endif #ifndef _PAGE_4K_PFN #define _PAGE_4K_PFN 0 #endif @@ -179,7 +176,5 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); #define HAVE_PAGE_AGP /* Advertise support for _PAGE_SPECIAL */ -#ifdef _PAGE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL -#endif From daf8f40391b2a1978ea2071c20959d91fade6b1a Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 23 Sep 2009 03:51:04 +0000 Subject: [PATCH 15/17] powerpc/4xx: Fix erroneous xmon warning on PowerPC 4xx The xmon code relies on MSR_RI being non-zero to indicate that an exception is recoverable. If it is not, it prints a warning message. However, the PowerPC 4xx cores do not have an MSR_RI bit and this warning is produced for every xmon event. This introduces an unrecoverable_excp function to determine if an exception is recoverable or not. This gets rid of the erroneous warnings on 4xx. Signed-off-by: Josh Boyer Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/xmon/xmon.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0e09a45ac79a..c6f0a71b405e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -335,6 +335,16 @@ int cpus_are_in_xmon(void) } #endif +static inline int unrecoverable_excp(struct pt_regs *regs) +{ +#ifdef CONFIG_4xx + /* We have no MSR_RI bit on 4xx, so we simply return false */ + return 0; +#else + return ((regs->msr & MSR_RI) == 0); +#endif +} + static int xmon_core(struct pt_regs *regs, int fromipi) { int cmd = 0; @@ -388,7 +398,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) bp = NULL; if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) bp = at_breakpoint(regs->nip); - if (bp || (regs->msr & MSR_RI) == 0) + if (bp || unrecoverable_excp(regs)) fromipi = 0; if (!fromipi) { @@ -399,7 +409,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) cpu, BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if ((regs->msr & MSR_RI) == 0) + if (unrecoverable_excp(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); release_output_lock(); @@ -490,7 +500,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) printf("Stopped at breakpoint %x (", BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if ((regs->msr & MSR_RI) == 0) + if (unrecoverable_excp(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); remove_bpts(); From e0908085fc2391c85b85fb814ae1df377c8e0dcb Mon Sep 17 00:00:00 2001 From: Rex Feany Date: Wed, 23 Sep 2009 14:45:52 +0000 Subject: [PATCH 16/17] powerpc/8xx: Fix regression introduced by cache coherency rewrite After upgrading to the latest kernel on my mpc875 userspace started running incredibly slow (hours to get to a shell, even!). I tracked it down to commit 8d30c14cab30d405a05f2aaceda1e9ad57800f36, that patch removed a work-around for the 8xx. Adding it back makes my problem go away. Signed-off-by: Rex Feany Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/pgtable.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 83f1551ec2c9..53040931de32 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -30,6 +30,8 @@ #include #include +#include "mmu_decl.h" + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #ifdef CONFIG_SMP @@ -166,7 +168,7 @@ struct page * maybe_pte_to_page(pte_t pte) * support falls into the same category. */ -static pte_t set_pte_filter(pte_t pte) +static pte_t set_pte_filter(pte_t pte, unsigned long addr) { pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || @@ -175,6 +177,17 @@ static pte_t set_pte_filter(pte_t pte) if (!pg) return pte; if (!test_bit(PG_arch_1, &pg->flags)) { +#ifdef CONFIG_8xx + /* On 8xx, cache control instructions (particularly + * "dcbst" from flush_dcache_icache) fault as write + * operation if there is an unpopulated TLB entry + * for the address in question. To workaround that, + * we invalidate the TLB here, thus avoiding dcbst + * misbehaviour. + */ + /* 8xx doesn't care about PID, size or ind args */ + _tlbil_va(addr, 0, 0, 0); +#endif /* CONFIG_8xx */ flush_dcache_icache_page(pg); set_bit(PG_arch_1, &pg->flags); } @@ -194,7 +207,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static pte_t set_pte_filter(pte_t pte, unsigned long addr) { struct page *pg; @@ -276,7 +289,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, * this context might not have been activated yet when this * is called. */ - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); /* Perform the setting of the PTE */ __set_pte_at(mm, addr, ptep, pte, 0); From 09dd3fc19c09f79115267361ecd7d5c5d2c27a3a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 24 Sep 2009 16:05:52 +1000 Subject: [PATCH 17/17] Fix build of cpm_uart due to core changes Commit ebd2c8f6d2ec4012c267ecb95e72a57b8355a705 "serial: kill off uart_info" broke the build of this driver, this fixes it. Signed-off-by: Benjamin Herrenschmidt --- drivers/serial/cpm_uart/cpm_uart_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c index 8d349b23249a..300cea768d74 100644 --- a/drivers/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/serial/cpm_uart/cpm_uart_core.c @@ -649,7 +649,7 @@ static int cpm_uart_tx_pump(struct uart_port *port) u8 *p; int count; struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port; - struct circ_buf *xmit = &port->info->xmit; + struct circ_buf *xmit = &port->state->xmit; /* Handle xon/xoff */ if (port->x_char) {