kernel-fxtec-pro1x/arch/arm/mach-omap2/omap4-common.c

/*
 * OMAP4 specific common source file.
 *
 * Copyright (C) 2010 Texas Instruments, Inc.
 * Author:
 *	Santosh Shilimkar <santosh.shilimkar@ti.com>
 *
 *
 * This program is free software,you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/platform_device.h>

#include <asm/hardware/gic.h>
#include <asm/hardware/cache-l2x0.h>

#include <mach/hardware.h>
#include <mach/omap4-common.h>

#ifdef CONFIG_CACHE_L2X0
void __iomem *l2cache_base;
#endif

void __iomem *gic_cpu_base_addr;
void __iomem *gic_dist_base_addr;


void __init gic_init_irq(void)
{
	/* Static mapping, never released */
	gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K);
	BUG_ON(!gic_dist_base_addr);
	gic_dist_init(0, gic_dist_base_addr, 29);

	/* Static mapping, never released */
	gic_cpu_base_addr = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512);
	BUG_ON(!gic_cpu_base_addr);
	gic_cpu_init(0, gic_cpu_base_addr);
}

#ifdef CONFIG_CACHE_L2X0

static void omap4_l2x0_disable(void)
{
	/* Disable PL310 L2 Cache controller */
	omap_smc1(0x102, 0x0);
}

static int __init omap_l2_cache_init(void)
{
	u32 aux_ctrl = 0;

	/*
	 * To avoid code running on other OMAPs in
	 * multi-omap builds
	 */
	if (!cpu_is_omap44xx())
		return -ENODEV;

	/* Static mapping, never released */
	l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);
	BUG_ON(!l2cache_base);

	/*
	 * 16-way associativity, parity disabled
	 * Way size - 32KB (es1.0)
	 * Way size - 64KB (es2.0 +)
	 */
	aux_ctrl = ((1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT) |
			(0x1 << 25) |
			(0x1 << L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT) |
			(0x1 << L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT));

	if (omap_rev() == OMAP4430_REV_ES1_0) {
		aux_ctrl |= 0x2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT;
	} else {
		aux_ctrl |= ((0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) |
			(1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) |
			(1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) |
			(1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) |
			(1 << L2X0_AUX_CTRL_EARLY_BRESP_SHIFT));
	}
	if (omap_rev() != OMAP4430_REV_ES1_0)
		omap_smc1(0x109, aux_ctrl);

	/* Enable PL310 L2 Cache controller */
	omap_smc1(0x102, 0x1);

	l2x0_init(l2cache_base, aux_ctrl, L2X0_AUX_CTRL_MASK);

	/*
	 * Override default outer_cache.disable with a OMAP4
	 * specific one
	*/
	outer_cache.disable = omap4_l2x0_disable;

	return 0;
}
early_initcall(omap_l2_cache_init);
#endif
omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00			`/*`
			`* OMAP4 specific common source file.`
			`*`
			`* Copyright (C) 2010 Texas Instruments, Inc.`
			`* Author:`
			`* Santosh Shilimkar <santosh.shilimkar@ti.com>`
			`*`
			`*`
			`* This program is free software,you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License version 2 as`
			`* published by the Free Software Foundation.`
			`*/`

			`#include <linux/kernel.h>`
			`#include <linux/init.h>`
			`#include <linux/io.h>`
			`#include <linux/platform_device.h>`

			`#include <asm/hardware/gic.h>`
			`#include <asm/hardware/cache-l2x0.h>`

			`#include <mach/hardware.h>`
			`#include <mach/omap4-common.h>`

			`#ifdef CONFIG_CACHE_L2X0`
			`void __iomem *l2cache_base;`
			`#endif`

			`void __iomem *gic_cpu_base_addr;`
			`void __iomem *gic_dist_base_addr;`


			`void __init gic_init_irq(void)`
			`{`
			`/* Static mapping, never released */`
			`gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K);`
			`BUG_ON(!gic_dist_base_addr);`
			`gic_dist_init(0, gic_dist_base_addr, 29);`

			`/* Static mapping, never released */`
			`gic_cpu_base_addr = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512);`
			`BUG_ON(!gic_cpu_base_addr);`
			`gic_cpu_init(0, gic_cpu_base_addr);`
			`}`

			`#ifdef CONFIG_CACHE_L2X0`
omap4: l2x0: Override the default l2x0_disable The machine_kexec() calls outer_disable which can crash on OMAP4 becasue of trustzone restrictions. This patch overrides the default l2x0_disable with a OMAP4 specific implementation taking care of trustzone Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Tony Lindgren <tony@atomide.com> Acked-by: Linus Walleij <linus.walleij@stericsson.com> 2010-07-31 10:10:10 -06:00
			`static void omap4_l2x0_disable(void)`
			`{`
			`/* Disable PL310 L2 Cache controller */`
			`omap_smc1(0x102, 0x0);`
			`}`

omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00			`static int __init omap_l2_cache_init(void)`
			`{`
omap4: l2x0: Construct the AUXCTRL value using defines This patch removes the hardcoded value of auxctrl value and construct it using bitfields Bit 25 is reserved and is always set to 1. Same value of this bit is retained in this patch Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:03 -07:00			`u32 aux_ctrl = 0;`

omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00			`/*`
			`* To avoid code running on other OMAPs in`
			`* multi-omap builds`
			`*/`
			`if (!cpu_is_omap44xx())`
			`return -ENODEV;`

			`/* Static mapping, never released */`
			`l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);`
			`BUG_ON(!l2cache_base);`

			`/*`
omap4: l2x0: Fix init parameter for es2.0 On ES2.0 the L2 cache init parameter ineeds to be changed to take care of cache size. The cache size is 1MB on ES2.0 vs 512KB on ES1.0 This patch fixes the init parameter to update the same using dynamic cpu version check Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> 2010-09-16 07:14:47 -06:00			`* 16-way associativity, parity disabled`
			`* Way size - 32KB (es1.0)`
			`* Way size - 64KB (es2.0 +)`
omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00			`*/`
omap4: l2x0: Construct the AUXCTRL value using defines This patch removes the hardcoded value of auxctrl value and construct it using bitfields Bit 25 is reserved and is always set to 1. Same value of this bit is retained in this patch Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:03 -07:00			`aux_ctrl = ((1 << L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT) \|`
			`(0x1 << 25) \|`
			`(0x1 << L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT) \|`
			`(0x1 << L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT));`

omap4: l2x0: enable instruction and data prefetching Enabling L2 prefetching improves performance as shown on Panda ES2.1 board with mem test, and it has measurable impact on performances. I think we should consider it, even though it damages "writes" a bit. (rebased to k.org) Usually the prefetch is used at both levels together L1 + L2, however, to enable the CP15 prefetch engines, these are under security, and on GP devices, we cannot enable it(e.g. on PandaBoard). However, just enabling PL310 prefetch seems to provide performance improvement, as shown in the data below (from Ubuntu) and would be a great thing to pull in. What prefetch does is enable automatic next line prefetching. With this enabled, whenever the PL310 receives a cachable read request, it automatically prefetches the following cache line as well. Measurement Data: == STOCK 10.10 WITHOUT PATCH ======================== ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2aaad000 0x2b2ad000 copy libc 133 MB/s copy Android v5 273 MB/s copy Android NEON 235 MB/s copy INT32 116 MB/s copy ASM ARM 187 MB/s copy ASM VLDM 64 204 MB/s copy ASM VLDM 128 173 MB/s copy ASM VLD1 216 MB/s read ASM ARM 286 MB/s read ASM VLDM 242 MB/s read ASM VLD1 286 MB/s write libc 1947 MB/s write ASM ARM 1943 MB/s write ASM VSTM 1942 MB/s write ASM VST1 1935 MB/s 10.10 + PATCH ============= ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2ab17000 0x2b317000 copy libc 129 MB/s copy Android v5 256 MB/s copy Android NEON 356 MB/s copy INT32 127 MB/s copy ASM ARM 321 MB/s copy ASM VLDM 64 337 MB/s copy ASM VLDM 128 321 MB/s copy ASM VLD1 350 MB/s read ASM ARM 496 MB/s read ASM VLDM 470 MB/s read ASM VLD1 488 MB/s write libc 1701 MB/s write ASM ARM 1682 MB/s write ASM VSTM 1693 MB/s write ASM VST1 1681 MB/s Signed-off-by: Mans Rullgard <mans@mansr.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:04 -07:00			`if (omap_rev() == OMAP4430_REV_ES1_0) {`
omap4: l2x0: Construct the AUXCTRL value using defines This patch removes the hardcoded value of auxctrl value and construct it using bitfields Bit 25 is reserved and is always set to 1. Same value of this bit is retained in this patch Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:03 -07:00			`aux_ctrl \|= 0x2 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT;`
omap4: l2x0: enable instruction and data prefetching Enabling L2 prefetching improves performance as shown on Panda ES2.1 board with mem test, and it has measurable impact on performances. I think we should consider it, even though it damages "writes" a bit. (rebased to k.org) Usually the prefetch is used at both levels together L1 + L2, however, to enable the CP15 prefetch engines, these are under security, and on GP devices, we cannot enable it(e.g. on PandaBoard). However, just enabling PL310 prefetch seems to provide performance improvement, as shown in the data below (from Ubuntu) and would be a great thing to pull in. What prefetch does is enable automatic next line prefetching. With this enabled, whenever the PL310 receives a cachable read request, it automatically prefetches the following cache line as well. Measurement Data: == STOCK 10.10 WITHOUT PATCH ======================== ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2aaad000 0x2b2ad000 copy libc 133 MB/s copy Android v5 273 MB/s copy Android NEON 235 MB/s copy INT32 116 MB/s copy ASM ARM 187 MB/s copy ASM VLDM 64 204 MB/s copy ASM VLDM 128 173 MB/s copy ASM VLD1 216 MB/s read ASM ARM 286 MB/s read ASM VLDM 242 MB/s read ASM VLD1 286 MB/s write libc 1947 MB/s write ASM ARM 1943 MB/s write ASM VSTM 1942 MB/s write ASM VST1 1935 MB/s 10.10 + PATCH ============= ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2ab17000 0x2b317000 copy libc 129 MB/s copy Android v5 256 MB/s copy Android NEON 356 MB/s copy INT32 127 MB/s copy ASM ARM 321 MB/s copy ASM VLDM 64 337 MB/s copy ASM VLDM 128 321 MB/s copy ASM VLD1 350 MB/s read ASM ARM 496 MB/s read ASM VLDM 470 MB/s read ASM VLD1 488 MB/s write libc 1701 MB/s write ASM ARM 1682 MB/s write ASM VSTM 1693 MB/s write ASM VST1 1681 MB/s Signed-off-by: Mans Rullgard <mans@mansr.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:04 -07:00			`} else {`
			`aux_ctrl \|= ((0x3 << L2X0_AUX_CTRL_WAY_SIZE_SHIFT) \|`
omap4: l2x0: Set share override bit Clearing bit 22 in the PL310 Auxiliary Control register (shared attribute override enable) has the side effect of transforming Normal Shared Non-cacheable reads into Cacheable no-allocate reads. Coherent DMA buffers in Linux always have a Cacheable alias via the kernel linear mapping and the processor can speculatively load cache lines into the PL310 controller. With bit 22 cleared, Non-cacheable reads would unexpectedly hit such cache lines leading to buffer corruption Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:05 -07:00			`(1 << L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT) \|`
omap4: l2x0: enable instruction and data prefetching Enabling L2 prefetching improves performance as shown on Panda ES2.1 board with mem test, and it has measurable impact on performances. I think we should consider it, even though it damages "writes" a bit. (rebased to k.org) Usually the prefetch is used at both levels together L1 + L2, however, to enable the CP15 prefetch engines, these are under security, and on GP devices, we cannot enable it(e.g. on PandaBoard). However, just enabling PL310 prefetch seems to provide performance improvement, as shown in the data below (from Ubuntu) and would be a great thing to pull in. What prefetch does is enable automatic next line prefetching. With this enabled, whenever the PL310 receives a cachable read request, it automatically prefetches the following cache line as well. Measurement Data: == STOCK 10.10 WITHOUT PATCH ======================== ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2aaad000 0x2b2ad000 copy libc 133 MB/s copy Android v5 273 MB/s copy Android NEON 235 MB/s copy INT32 116 MB/s copy ASM ARM 187 MB/s copy ASM VLDM 64 204 MB/s copy ASM VLDM 128 173 MB/s copy ASM VLD1 216 MB/s read ASM ARM 286 MB/s read ASM VLDM 242 MB/s read ASM VLD1 286 MB/s write libc 1947 MB/s write ASM ARM 1943 MB/s write ASM VSTM 1942 MB/s write ASM VST1 1935 MB/s 10.10 + PATCH ============= ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2ab17000 0x2b317000 copy libc 129 MB/s copy Android v5 256 MB/s copy Android NEON 356 MB/s copy INT32 127 MB/s copy ASM ARM 321 MB/s copy ASM VLDM 64 337 MB/s copy ASM VLDM 128 321 MB/s copy ASM VLD1 350 MB/s read ASM ARM 496 MB/s read ASM VLDM 470 MB/s read ASM VLD1 488 MB/s write libc 1701 MB/s write ASM ARM 1682 MB/s write ASM VSTM 1693 MB/s write ASM VST1 1681 MB/s Signed-off-by: Mans Rullgard <mans@mansr.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:04 -07:00			`(1 << L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT) \|`
omap4: l2x0: Enable early BRESP bit The AXI protocol specifies that the write response can only be sent back to an AXI master when the last write data has been accepted. This optimization enables the PL310 to send the write response of certain write transactions as soon as the store buffer accepts the write address. This behavior is not compatible with the AXI protocol and is disabled by default. You enable this optimization by setting the Early BRESP Enable bit in the Auxiliary Control Register (bit [30]). Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Mans Rullgard <mans@mansr.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:06 -07:00			`(1 << L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT) \|`
			`(1 << L2X0_AUX_CTRL_EARLY_BRESP_SHIFT));`
omap4: l2x0: enable instruction and data prefetching Enabling L2 prefetching improves performance as shown on Panda ES2.1 board with mem test, and it has measurable impact on performances. I think we should consider it, even though it damages "writes" a bit. (rebased to k.org) Usually the prefetch is used at both levels together L1 + L2, however, to enable the CP15 prefetch engines, these are under security, and on GP devices, we cannot enable it(e.g. on PandaBoard). However, just enabling PL310 prefetch seems to provide performance improvement, as shown in the data below (from Ubuntu) and would be a great thing to pull in. What prefetch does is enable automatic next line prefetching. With this enabled, whenever the PL310 receives a cachable read request, it automatically prefetches the following cache line as well. Measurement Data: == STOCK 10.10 WITHOUT PATCH ======================== ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2aaad000 0x2b2ad000 copy libc 133 MB/s copy Android v5 273 MB/s copy Android NEON 235 MB/s copy INT32 116 MB/s copy ASM ARM 187 MB/s copy ASM VLDM 64 204 MB/s copy ASM VLDM 128 173 MB/s copy ASM VLD1 216 MB/s read ASM ARM 286 MB/s read ASM VLDM 242 MB/s read ASM VLD1 286 MB/s write libc 1947 MB/s write ASM ARM 1943 MB/s write ASM VSTM 1942 MB/s write ASM VST1 1935 MB/s 10.10 + PATCH ============= ~# ./memspeed size 8388608 8192k 8M offset 8388608, 0 buffers 0x2ab17000 0x2b317000 copy libc 129 MB/s copy Android v5 256 MB/s copy Android NEON 356 MB/s copy INT32 127 MB/s copy ASM ARM 321 MB/s copy ASM VLDM 64 337 MB/s copy ASM VLDM 128 321 MB/s copy ASM VLD1 350 MB/s read ASM ARM 496 MB/s read ASM VLDM 470 MB/s read ASM VLD1 488 MB/s write libc 1701 MB/s write ASM ARM 1682 MB/s write ASM VSTM 1693 MB/s write ASM VST1 1681 MB/s Signed-off-by: Mans Rullgard <mans@mansr.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:04 -07:00			`}`
			`if (omap_rev() != OMAP4430_REV_ES1_0)`
			`omap_smc1(0x109, aux_ctrl);`

			`/* Enable PL310 L2 Cache controller */`
			`omap_smc1(0x102, 0x1);`
omap4: l2x0: Construct the AUXCTRL value using defines This patch removes the hardcoded value of auxctrl value and construct it using bitfields Bit 25 is reserved and is always set to 1. Same value of this bit is retained in this patch Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Nishanth Menon <nm@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-11-19 10:31:03 -07:00
			`l2x0_init(l2cache_base, aux_ctrl, L2X0_AUX_CTRL_MASK);`
omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00
omap4: l2x0: Override the default l2x0_disable The machine_kexec() calls outer_disable which can crash on OMAP4 becasue of trustzone restrictions. This patch overrides the default l2x0_disable with a OMAP4 specific implementation taking care of trustzone Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Tony Lindgren <tony@atomide.com> Acked-by: Linus Walleij <linus.walleij@stericsson.com> 2010-07-31 10:10:10 -06:00			`/*`
			`* Override default outer_cache.disable with a OMAP4`
			`* specific one`
			`*/`
			`outer_cache.disable = omap4_l2x0_disable;`

omap4: Move SOC specific code from board file This patch moves OMAP4 soc specific code from 4430sdp board file. The change is necessary so that newer board support can be added with minimal changes. This will be also problematic for multi-board, multi-omap builds. Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> 2010-05-14 13:05:26 -06:00			`return 0;`
			`}`
			`early_initcall(omap_l2_cache_init);`
			`#endif`