diff --git a/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt b/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt
index 61df564c0d23..d74091a8a3bf 100644
--- a/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt
+++ b/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt
@@ -4,6 +4,8 @@ Marvell Armada 370 and Armada XP Interrupt Controller
 Required properties:
 - compatible: Should be "marvell,mpic"
 - interrupt-controller: Identifies the node as an interrupt controller.
+- msi-controller: Identifies the node as an PCI Message Signaled
+  Interrupt controller.
 - #interrupt-cells: The number of cells to define the interrupts. Should be 1.
   The cell is the IRQ number
 
@@ -24,6 +26,7 @@ Example:
               #address-cells = <1>;
               #size-cells = <1>;
               interrupt-controller;
+              msi-controller;
               reg = <0xd0020a00 0x1d0>,
                     <0xd0021070 0x58>;
         };
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
index 92d36e2aa877..f28d82bbbc56 100644
--- a/Documentation/devicetree/bindings/arm/cci.txt
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -36,14 +36,18 @@ specific to ARM.
 
 	- reg
 		Usage: required
-		Value type: <prop-encoded-array>
+		Value type: Integer cells. A register entry, expressed as a pair
+			    of cells, containing base and size.
 		Definition: A standard property. Specifies base physical
 			    address of CCI control registers common to all
 			    interfaces.
 
 	- ranges:
 		Usage: required
-		Value type: <prop-encoded-array>
+		Value type: Integer cells. An array of range entries, expressed
+			    as a tuple of cells, containing child address,
+			    parent address and the size of the region in the
+			    child address space.
 		Definition: A standard property. Follow rules in the ePAPR for
 			    hierarchical bus addressing. CCI interfaces
 			    addresses refer to the parent node addressing
@@ -74,11 +78,49 @@ specific to ARM.
 
 		- reg:
 			Usage: required
-			Value type: <prop-encoded-array>
+			Value type: Integer cells. A register entry, expressed
+				    as a pair of cells, containing base and
+				    size.
 			Definition: the base address and size of the
 				    corresponding interface programming
 				    registers.
 
+	- CCI PMU node
+
+		Parent node must be CCI interconnect node.
+
+		A CCI pmu node must contain the following properties:
+
+		- compatible
+			Usage: required
+			Value type: <string>
+			Definition: must be "arm,cci-400-pmu"
+
+		- reg:
+			Usage: required
+			Value type: Integer cells. A register entry, expressed
+				    as a pair of cells, containing base and
+				    size.
+			Definition: the base address and size of the
+				    corresponding interface programming
+				    registers.
+
+		- interrupts:
+			Usage: required
+			Value type: Integer cells. Array of interrupt specifier
+				    entries, as defined in
+				    ../interrupt-controller/interrupts.txt.
+			Definition: list of counter overflow interrupts, one per
+				    counter. The interrupts must be specified
+				    starting with the cycle counter overflow
+				    interrupt, followed by counter0 overflow
+				    interrupt, counter1 overflow interrupt,...
+				    ,counterN overflow interrupt.
+
+				    The CCI PMU has an interrupt signal for each
+				    counter. The number of interrupts must be
+				    equal to the number of counters.
+
 * CCI interconnect bus masters
 
 	Description: masters in the device tree connected to a CCI port
@@ -144,7 +186,7 @@ Example:
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x0 0x2c090000 0 0x1000>;
-		ranges = <0x0 0x0 0x2c090000 0x6000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
 
 		cci_control0: slave-if@1000 {
 			compatible = "arm,cci-400-ctrl-if";
@@ -163,6 +205,16 @@ Example:
 			interface-type = "ace";
 			reg = <0x5000 0x1000>;
 		};
+
+		pmu@9000 {
+			 compatible = "arm,cci-400-pmu";
+			 reg = <0x9000 0x5000>;
+			 interrupts = <0 101 4>,
+				      <0 102 4>,
+				      <0 103 4>,
+				      <0 104 4>,
+				      <0 105 4>;
+		};
 	};
 
 This CCI node corresponds to a CCI component whose control registers sits
diff --git a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
index cffc93d97f54..fc2910fa7e45 100644
--- a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
+++ b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
@@ -1,10 +1,10 @@
-* Gated Clock bindings for Marvell Orion SoCs
+* Gated Clock bindings for Marvell EBU SoCs
 
-Marvell Dove and Kirkwood allow some peripheral clocks to be gated to save
-some power. The clock consumer should specify the desired clock by having
-the clock ID in its "clocks" phandle cell. The clock ID is directly mapped to
-the corresponding clock gating control bit in HW to ease manual clock lookup
-in datasheet.
+Marvell Armada 370/XP, Dove and Kirkwood allow some peripheral clocks to be
+gated to save some power. The clock consumer should specify the desired clock
+by having the clock ID in its "clocks" phandle cell. The clock ID is directly
+mapped to the corresponding clock gating control bit in HW to ease manual clock
+lookup in datasheet.
 
 The following is a list of provided IDs for Armada 370:
 ID	Clock	Peripheral
@@ -94,6 +94,8 @@ ID	Clock	Peripheral
 
 Required properties:
 - compatible : shall be one of the following:
+	"marvell,armada-370-gating-clock" - for Armada 370 SoC clock gating
+	"marvell,armada-xp-gating-clock" - for Armada XP SoC clock gating
 	"marvell,dove-gating-clock" - for Dove SoC clock gating
 	"marvell,kirkwood-gating-clock" - for Kirkwood SoC clock gating
 - reg : shall be the register address of the Clock Gating Control register
diff --git a/Documentation/devicetree/bindings/pci/mvebu-pci.txt b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
index 9556e2fedf6d..08c716b2c6b6 100644
--- a/Documentation/devicetree/bindings/pci/mvebu-pci.txt
+++ b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
@@ -5,6 +5,7 @@ Mandatory properties:
 - compatible: one of the following values:
     marvell,armada-370-pcie
     marvell,armada-xp-pcie
+    marvell,dove-pcie
     marvell,kirkwood-pcie
 - #address-cells, set to <3>
 - #size-cells, set to <2>
@@ -14,6 +15,8 @@ Mandatory properties:
 - ranges: ranges describing the MMIO registers to control the PCIe
   interfaces, and ranges describing the MBus windows needed to access
   the memory and I/O regions of each PCIe interface.
+- msi-parent: Link to the hardware entity that serves as the Message
+  Signaled Interrupt controller for this PCI controller.
 
 The ranges describing the MMIO registers have the following layout:
 
@@ -74,6 +77,8 @@ and the following optional properties:
 - marvell,pcie-lane: the physical PCIe lane number, for ports having
   multiple lanes. If this property is not found, we assume that the
   value is 0.
+- reset-gpios: optional gpio to PERST#
+- reset-delay-us: delay in us to wait after reset de-assertion
 
 Example:
 
@@ -86,6 +91,7 @@ pcie-controller {
 	#size-cells = <2>;
 
 	bus-range = <0x00 0xff>;
+	msi-parent = <&mpic>;
 
 	ranges =
 	       <0x82000000 0 0x40000 MBUS_ID(0xf0, 0x01) 0x40000 0 0x00002000	/* Port 0.0 registers */
@@ -135,6 +141,10 @@ pcie-controller {
 		interrupt-map = <0 0 0 0 &mpic 58>;
 		marvell,pcie-port = <0>;
 		marvell,pcie-lane = <0>;
+		/* low-active PERST# reset on GPIO 25 */
+		reset-gpios = <&gpio0 25 1>;
+		/* wait 20ms for device settle after reset deassertion */
+		reset-delay-us = <20000>;
 		clocks = <&gateclk 5>;
 		status = "disabled";
 	};
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9bd3a85d880f..04163fece49f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -826,7 +826,6 @@ config ARCH_DAVINCI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select HAVE_IDE
-	select NEED_MACH_GPIO_H
 	select TI_PRIV_EDMA
 	select USE_OF
 	select ZONE_DMA
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 30a44e2df47e..40f15f133c55 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -22,17 +22,19 @@
 #include <linux/mtd/partitions.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/mtd-davinci-aemif.h>
+#include <linux/platform_data/spi-davinci.h>
+#include <linux/platform_data/usb-davinci.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
+#include <mach/common.h>
 #include <mach/cp_intc.h>
 #include <mach/mux.h>
-#include <linux/platform_data/mtd-davinci.h>
 #include <mach/da8xx.h>
-#include <linux/platform_data/usb-davinci.h>
-#include <linux/platform_data/mtd-davinci-aemif.h>
-#include <linux/platform_data/spi-davinci.h>
 
 #define DA830_EVM_PHY_ID		""
 /*
@@ -591,6 +593,10 @@ static __init void da830_evm_init(void)
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
 	int ret;
 
+	ret = da830_register_gpio();
+	if (ret)
+		pr_warn("da830_evm_init: GPIO init failed: %d\n", ret);
+
 	ret = da830_register_edma(da830_edma_rsv);
 	if (ret)
 		pr_warning("da830_evm_init: edma registration failed: %d\n",
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index dd1fb24521aa..df16cb88a26b 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -28,6 +28,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/gpio-davinci.h>
 #include <linux/platform_data/mtd-davinci.h>
 #include <linux/platform_data/mtd-davinci-aemif.h>
 #include <linux/platform_data/spi-davinci.h>
@@ -38,6 +39,7 @@
 #include <linux/spi/flash.h>
 #include <linux/wl12xx.h>
 
+#include <mach/common.h>
 #include <mach/cp_intc.h>
 #include <mach/da8xx.h>
 #include <mach/mux.h>
@@ -1437,6 +1439,10 @@ static __init void da850_evm_init(void)
 {
 	int ret;
 
+	ret = da850_register_gpio();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	ret = pmic_tps65070_init();
 	if (ret)
 		pr_warn("%s: TPS65070 PMIC init failed: %d\n", __func__, ret);
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 42b23a3194a0..ecdc7d44fa70 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -22,15 +22,17 @@
 #include <media/tvp514x.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/eeprom.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/i2c-davinci.h>
+#include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/mmc-davinci.h>
+#include <linux/platform_data/usb-davinci.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
-#include <linux/platform_data/i2c-davinci.h>
 #include <mach/serial.h>
-#include <linux/platform_data/mtd-davinci.h>
-#include <linux/platform_data/mmc-davinci.h>
-#include <linux/platform_data/usb-davinci.h>
+#include <mach/common.h>
 
 #include "davinci.h"
 
@@ -375,6 +377,11 @@ static struct spi_board_info dm355_evm_spi_info[] __initconst = {
 static __init void dm355_evm_init(void)
 {
 	struct clk *aemif;
+	int ret;
+
+	ret = dm355_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
 
 	gpio_request(1, "dm9000");
 	gpio_direction_input(1);
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index 65a984c52df6..43bacbf15314 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -19,15 +19,16 @@
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/eeprom.h>
+#include <linux/platform_data/i2c-davinci.h>
+#include <linux/platform_data/mmc-davinci.h>
+#include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/usb-davinci.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
-#include <linux/platform_data/i2c-davinci.h>
+#include <mach/common.h>
 #include <mach/serial.h>
-#include <linux/platform_data/mtd-davinci.h>
-#include <linux/platform_data/mmc-davinci.h>
-#include <linux/platform_data/usb-davinci.h>
 
 #include "davinci.h"
 
@@ -234,6 +235,11 @@ static struct spi_board_info dm355_leopard_spi_info[] __initconst = {
 static __init void dm355_leopard_init(void)
 {
 	struct clk *aemif;
+	int ret;
+
+	ret = dm355_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
 
 	gpio_request(9, "dm9000");
 	gpio_direction_input(9);
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 4078ba93776b..f4a6c18912ea 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -743,6 +743,12 @@ static struct spi_board_info dm365_evm_spi_info[] __initconst = {
 
 static __init void dm365_evm_init(void)
 {
+	int ret;
+
+	ret = dm365_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	evm_init_i2c();
 	davinci_serial_init(dm365_serial_device);
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 40bb9b5b87e8..9cc32c283b8b 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -754,9 +754,14 @@ static int davinci_phy_fixup(struct phy_device *phydev)
 
 static __init void davinci_evm_init(void)
 {
+	int ret;
 	struct clk *aemif_clk;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
+	ret = dm644x_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	aemif_clk = clk_get(NULL, "aemif");
 	clk_prepare_enable(aemif_clk);
 
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 2bc3651d56cc..44b20191a9fe 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -33,17 +33,19 @@
 #include <linux/mtd/partitions.h>
 #include <linux/clk.h>
 #include <linux/export.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/i2c-davinci.h>
+#include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/mtd-davinci-aemif.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
 #include <mach/common.h>
+#include <mach/irqs.h>
 #include <mach/serial.h>
-#include <linux/platform_data/i2c-davinci.h>
-#include <linux/platform_data/mtd-davinci.h>
 #include <mach/clock.h>
 #include <mach/cdce949.h>
-#include <linux/platform_data/mtd-davinci-aemif.h>
 
 #include "davinci.h"
 #include "clock.h"
@@ -786,8 +788,13 @@ static struct edma_rsv_info dm646x_edma_rsv[] = {
 
 static __init void evm_init(void)
 {
+	int ret;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
+	ret = dm646x_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	evm_init_i2c();
 	davinci_serial_init(dm646x_serial_device);
 	dm646x_init_mcasp0(&dm646x_evm_snd_data[0]);
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 46f336fca803..bb680af98374 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -26,17 +26,18 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/mtd/partitions.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/i2c-davinci.h>
+#include <linux/platform_data/mmc-davinci.h>
+#include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/usb-davinci.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
 #include <mach/common.h>
-#include <linux/platform_data/i2c-davinci.h>
 #include <mach/serial.h>
 #include <mach/mux.h>
-#include <linux/platform_data/mtd-davinci.h>
-#include <linux/platform_data/mmc-davinci.h>
-#include <linux/platform_data/usb-davinci.h>
 
 #include "davinci.h"
 
@@ -169,9 +170,14 @@ static struct davinci_mmc_config davinci_ntosd2_mmc_config = {
 
 static __init void davinci_ntosd2_init(void)
 {
+	int ret;
 	struct clk *aemif_clk;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
+	ret = dm644x_gpio_register();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	aemif_clk = clk_get(NULL, "aemif");
 	clk_prepare_enable(aemif_clk);
 
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index e0de2da41b5c..2aac51d0e853 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -13,10 +13,12 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
+#include <mach/common.h>
 #include <mach/cp_intc.h>
 #include <mach/da8xx.h>
 #include <mach/mux.h>
@@ -290,6 +292,10 @@ static __init void omapl138_hawk_init(void)
 {
 	int ret;
 
+	ret = da850_register_gpio();
+	if (ret)
+		pr_warn("%s: GPIO init failed: %d\n", __func__, ret);
+
 	davinci_serial_init(da8xx_serial_device);
 
 	omapl138_hawk_config_emac();
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index d6c746e35ad9..0813b5167e05 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -11,6 +11,7 @@
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -20,7 +21,6 @@
 #include <mach/common.h>
 #include <mach/time.h>
 #include <mach/da8xx.h>
-#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
@@ -1151,6 +1151,16 @@ static struct davinci_id da830_ids[] = {
 	},
 };
 
+static struct davinci_gpio_platform_data da830_gpio_platform_data = {
+	.ngpio = 128,
+	.intc_irq_num = DA830_N_CP_INTC_IRQ,
+};
+
+int __init da830_register_gpio(void)
+{
+	return da8xx_register_gpio(&da830_gpio_platform_data);
+}
+
 static struct davinci_timer_instance da830_timer_instance[2] = {
 	{
 		.base		= DA8XX_TIMER64P0_BASE,
@@ -1196,10 +1206,6 @@ static struct davinci_soc_info davinci_soc_info_da830 = {
 	.intc_irq_prios		= da830_default_priorities,
 	.intc_irq_num		= DA830_N_CP_INTC_IRQ,
 	.timer_info		= &da830_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DA8XX_GPIO_BASE,
-	.gpio_num		= 128,
-	.gpio_irq		= IRQ_DA8XX_GPIO0,
 	.emac_pdata		= &da8xx_emac_pdata,
 };
 
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index f56e5fbfa2fd..352984e1528a 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -17,6 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/cpufreq.h>
 #include <linux/regulator/consumer.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -28,7 +29,6 @@
 #include <mach/da8xx.h>
 #include <mach/cpufreq.h>
 #include <mach/pm.h>
-#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
@@ -1281,6 +1281,16 @@ int __init da850_register_vpif_capture(struct vpif_capture_config
 	return platform_device_register(&da850_vpif_capture_dev);
 }
 
+static struct davinci_gpio_platform_data da850_gpio_platform_data = {
+	.ngpio = 144,
+	.intc_irq_num = DA850_N_CP_INTC_IRQ,
+};
+
+int __init da850_register_gpio(void)
+{
+	return da8xx_register_gpio(&da850_gpio_platform_data);
+}
+
 static struct davinci_soc_info davinci_soc_info_da850 = {
 	.io_desc		= da850_io_desc,
 	.io_desc_num		= ARRAY_SIZE(da850_io_desc),
@@ -1298,10 +1308,6 @@ static struct davinci_soc_info davinci_soc_info_da850 = {
 	.intc_irq_prios		= da850_default_priorities,
 	.intc_irq_num		= DA850_N_CP_INTC_IRQ,
 	.timer_info		= &da850_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DA8XX_GPIO_BASE,
-	.gpio_num		= 144,
-	.gpio_irq		= IRQ_DA8XX_GPIO0,
 	.emac_pdata		= &da8xx_emac_pdata,
 	.sram_dma		= DA8XX_SHARED_RAM_BASE,
 	.sram_len		= SZ_128K,
diff --git a/arch/arm/mach-davinci/davinci.h b/arch/arm/mach-davinci/davinci.h
index 2ab5d577186f..2eebc4338802 100644
--- a/arch/arm/mach-davinci/davinci.h
+++ b/arch/arm/mach-davinci/davinci.h
@@ -53,6 +53,9 @@ extern void __iomem *davinci_sysmod_base;
 #define DAVINCI_SYSMOD_VIRT(x)	(davinci_sysmod_base + (x))
 void davinci_map_sysmod(void);
 
+#define DAVINCI_GPIO_BASE 0x01C67000
+int davinci_gpio_register(struct resource *res, int size, void *pdata);
+
 /* DM355 base addresses */
 #define DM355_ASYNC_EMIF_CONTROL_BASE	0x01e10000
 #define DM355_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
@@ -82,6 +85,7 @@ void dm355_init_spi0(unsigned chipselect_mask,
 		const struct spi_board_info *info, unsigned len);
 void dm355_init_asp1(u32 evt_enable, struct snd_platform_data *pdata);
 int dm355_init_video(struct vpfe_config *, struct vpbe_config *);
+int dm355_gpio_register(void);
 
 /* DM365 function declarations */
 void dm365_init(void);
@@ -92,11 +96,13 @@ void dm365_init_rtc(void);
 void dm365_init_spi0(unsigned chipselect_mask,
 			const struct spi_board_info *info, unsigned len);
 int dm365_init_video(struct vpfe_config *, struct vpbe_config *);
+int dm365_gpio_register(void);
 
 /* DM644x function declarations */
 void dm644x_init(void);
 void dm644x_init_asp(struct snd_platform_data *pdata);
 int dm644x_init_video(struct vpfe_config *, struct vpbe_config *);
+int dm644x_gpio_register(void);
 
 /* DM646x function declarations */
 void dm646x_init(void);
@@ -106,6 +112,7 @@ int dm646x_init_edma(struct edma_rsv_info *rsv);
 void dm646x_video_init(void);
 void dm646x_setup_vpif(struct vpif_display_config *,
 		       struct vpif_capture_config *);
+int dm646x_gpio_register(void);
 
 extern struct platform_device dm365_serial_device[];
 extern struct platform_device dm355_serial_device[];
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 2e473fefd71e..c46eccbbd512 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -665,6 +665,32 @@ int __init da8xx_register_lcdc(struct da8xx_lcdc_platform_data *pdata)
 	return platform_device_register(&da8xx_lcdc_device);
 }
 
+static struct resource da8xx_gpio_resources[] = {
+	{ /* registers */
+		.start	= DA8XX_GPIO_BASE,
+		.end	= DA8XX_GPIO_BASE + SZ_4K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{ /* interrupt */
+		.start	= IRQ_DA8XX_GPIO0,
+		.end	= IRQ_DA8XX_GPIO8,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device da8xx_gpio_device = {
+	.name		= "davinci_gpio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(da8xx_gpio_resources),
+	.resource	= da8xx_gpio_resources,
+};
+
+int __init da8xx_register_gpio(void *pdata)
+{
+	da8xx_gpio_device.dev.platform_data = pdata;
+	return platform_device_register(&da8xx_gpio_device);
+}
+
 static struct resource da8xx_mmcsd0_resources[] = {
 	{		/* registers */
 		.start	= DA8XX_MMCSD0_BASE,
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 111573c0aad1..3996e98f52fb 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -318,6 +318,19 @@ static void davinci_init_wdt(void)
 	platform_device_register(&davinci_wdt_device);
 }
 
+static struct platform_device davinci_gpio_device = {
+	.name	= "davinci_gpio",
+	.id	= -1,
+};
+
+int davinci_gpio_register(struct resource *res, int size, void *pdata)
+{
+	davinci_gpio_device.resource = res;
+	davinci_gpio_device.num_resources = size;
+	davinci_gpio_device.dev.platform_data = pdata;
+	return platform_device_register(&davinci_gpio_device);
+}
+
 /*-------------------------------------------------------------------------*/
 
 /*-------------------------------------------------------------------------*/
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 3eaa5f6b2160..ef9ff1fb6f52 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -13,8 +13,10 @@
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-
 #include <linux/spi/spi.h>
+#include <linux/platform_data/edma.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/spi-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -25,9 +27,6 @@
 #include <mach/time.h>
 #include <mach/serial.h>
 #include <mach/common.h>
-#include <linux/platform_data/spi-davinci.h>
-#include <mach/gpio-davinci.h>
-#include <linux/platform_data/edma.h>
 
 #include "davinci.h"
 #include "clock.h"
@@ -886,6 +885,30 @@ static struct platform_device dm355_vpbe_dev = {
 	},
 };
 
+static struct resource dm355_gpio_resources[] = {
+	{	/* registers */
+		.start	= DAVINCI_GPIO_BASE,
+		.end	= DAVINCI_GPIO_BASE + SZ_4K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{	/* interrupt */
+		.start	= IRQ_DM355_GPIOBNK0,
+		.end	= IRQ_DM355_GPIOBNK6,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct davinci_gpio_platform_data dm355_gpio_platform_data = {
+	.ngpio		= 104,
+	.intc_irq_num	= DAVINCI_N_AINTC_IRQ,
+};
+
+int __init dm355_gpio_register(void)
+{
+	return davinci_gpio_register(dm355_gpio_resources,
+				     sizeof(dm355_gpio_resources),
+				     &dm355_gpio_platform_data);
+}
 /*----------------------------------------------------------------------*/
 
 static struct map_desc dm355_io_desc[] = {
@@ -1005,10 +1028,6 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.intc_irq_prios		= dm355_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm355_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DAVINCI_GPIO_BASE,
-	.gpio_num		= 104,
-	.gpio_irq		= IRQ_DM355_GPIOBNK0,
 	.sram_dma		= 0x00010000,
 	.sram_len		= SZ_32K,
 };
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index c29e324eb0bb..1511a0680f9a 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -19,6 +19,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/platform_data/edma.h>
+#include <linux/platform_data/gpio-davinci.h>
+#include <linux/platform_data/keyscan-davinci.h>
+#include <linux/platform_data/spi-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -29,9 +32,6 @@
 #include <mach/time.h>
 #include <mach/serial.h>
 #include <mach/common.h>
-#include <linux/platform_data/keyscan-davinci.h>
-#include <linux/platform_data/spi-davinci.h>
-#include <mach/gpio-davinci.h>
 
 #include "davinci.h"
 #include "clock.h"
@@ -698,6 +698,32 @@ void __init dm365_init_spi0(unsigned chipselect_mask,
 	platform_device_register(&dm365_spi0_device);
 }
 
+static struct resource dm365_gpio_resources[] = {
+	{	/* registers */
+		.start	= DAVINCI_GPIO_BASE,
+		.end	= DAVINCI_GPIO_BASE + SZ_4K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{	/* interrupt */
+		.start	= IRQ_DM365_GPIO0,
+		.end	= IRQ_DM365_GPIO7,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct davinci_gpio_platform_data dm365_gpio_platform_data = {
+	.ngpio		= 104,
+	.intc_irq_num	= DAVINCI_N_AINTC_IRQ,
+	.gpio_unbanked	= 8,
+};
+
+int __init dm365_gpio_register(void)
+{
+	return davinci_gpio_register(dm365_gpio_resources,
+				     sizeof(dm365_gpio_resources),
+				     &dm365_gpio_platform_data);
+}
+
 static struct emac_platform_data dm365_emac_pdata = {
 	.ctrl_reg_offset	= DM365_EMAC_CNTRL_OFFSET,
 	.ctrl_mod_reg_offset	= DM365_EMAC_CNTRL_MOD_OFFSET,
@@ -1105,11 +1131,6 @@ static struct davinci_soc_info davinci_soc_info_dm365 = {
 	.intc_irq_prios		= dm365_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm365_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DAVINCI_GPIO_BASE,
-	.gpio_num		= 104,
-	.gpio_irq		= IRQ_DM365_GPIO0,
-	.gpio_unbanked		= 8,	/* really 16 ... skip muxed GPIOs */
 	.emac_pdata		= &dm365_emac_pdata,
 	.sram_dma		= 0x00010000,
 	.sram_len		= SZ_32K,
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 4f74682293d6..143a3217e8ef 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -13,6 +13,7 @@
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/edma.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -23,7 +24,6 @@
 #include <mach/time.h>
 #include <mach/serial.h>
 #include <mach/common.h>
-#include <mach/gpio-davinci.h>
 
 #include "davinci.h"
 #include "clock.h"
@@ -771,6 +771,30 @@ static struct platform_device dm644x_vpbe_dev = {
 	},
 };
 
+static struct resource dm644_gpio_resources[] = {
+	{	/* registers */
+		.start	= DAVINCI_GPIO_BASE,
+		.end	= DAVINCI_GPIO_BASE + SZ_4K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{	/* interrupt */
+		.start	= IRQ_GPIOBNK0,
+		.end	= IRQ_GPIOBNK4,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct davinci_gpio_platform_data dm644_gpio_platform_data = {
+	.ngpio		= 71,
+	.intc_irq_num	= DAVINCI_N_AINTC_IRQ,
+};
+
+int __init dm644x_gpio_register(void)
+{
+	return davinci_gpio_register(dm644_gpio_resources,
+				     sizeof(dm644_gpio_resources),
+				     &dm644_gpio_platform_data);
+}
 /*----------------------------------------------------------------------*/
 
 static struct map_desc dm644x_io_desc[] = {
@@ -897,10 +921,6 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.intc_irq_prios 	= dm644x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm644x_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DAVINCI_GPIO_BASE,
-	.gpio_num		= 71,
-	.gpio_irq		= IRQ_GPIOBNK0,
 	.emac_pdata		= &dm644x_emac_pdata,
 	.sram_dma		= 0x00008000,
 	.sram_len		= SZ_16K,
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 68f8d1f1aca1..2a73f299c1d0 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -14,6 +14,7 @@
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/edma.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <asm/mach/map.h>
 
@@ -24,7 +25,6 @@
 #include <mach/time.h>
 #include <mach/serial.h>
 #include <mach/common.h>
-#include <mach/gpio-davinci.h>
 
 #include "davinci.h"
 #include "clock.h"
@@ -748,6 +748,30 @@ static struct platform_device vpif_capture_dev = {
 	.num_resources	= ARRAY_SIZE(vpif_capture_resource),
 };
 
+static struct resource dm646x_gpio_resources[] = {
+	{	/* registers */
+		.start	= DAVINCI_GPIO_BASE,
+		.end	= DAVINCI_GPIO_BASE + SZ_4K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{	/* interrupt */
+		.start	= IRQ_DM646X_GPIOBNK0,
+		.end	= IRQ_DM646X_GPIOBNK2,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct davinci_gpio_platform_data dm646x_gpio_platform_data = {
+	.ngpio		= 43,
+	.intc_irq_num	= DAVINCI_N_AINTC_IRQ,
+};
+
+int __init dm646x_gpio_register(void)
+{
+	return davinci_gpio_register(dm646x_gpio_resources,
+				     sizeof(dm646x_gpio_resources),
+				     &dm646x_gpio_platform_data);
+}
 /*----------------------------------------------------------------------*/
 
 static struct map_desc dm646x_io_desc[] = {
@@ -874,10 +898,6 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.intc_irq_prios		= dm646x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm646x_timer_info,
-	.gpio_type		= GPIO_TYPE_DAVINCI,
-	.gpio_base		= DAVINCI_GPIO_BASE,
-	.gpio_num		= 43, /* Only 33 usable */
-	.gpio_irq		= IRQ_DM646X_GPIOBNK0,
 	.emac_pdata		= &dm646x_emac_pdata,
 	.sram_dma		= 0x10010000,
 	.sram_len		= SZ_32K,
diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h
index aae53072c0eb..39e58b48e826 100644
--- a/arch/arm/mach-davinci/include/mach/da8xx.h
+++ b/arch/arm/mach-davinci/include/mach/da8xx.h
@@ -97,6 +97,7 @@ int da8xx_register_mmcsd0(struct davinci_mmc_config *config);
 int da850_register_mmcsd1(struct davinci_mmc_config *config);
 void da8xx_register_mcasp(int id, struct snd_platform_data *pdata);
 int da8xx_register_rtc(void);
+int da8xx_register_gpio(void *pdata);
 int da850_register_cpufreq(char *async_clk);
 int da8xx_register_cpuidle(void);
 void __iomem *da8xx_get_mem_ctlr(void);
@@ -110,6 +111,8 @@ int da850_register_vpif_capture
 void da8xx_restart(enum reboot_mode mode, const char *cmd);
 void da8xx_rproc_reserve_cma(void);
 int da8xx_register_rproc(void);
+int da850_register_gpio(void);
+int da830_register_gpio(void);
 
 extern struct platform_device da8xx_serial_device[];
 extern struct emac_platform_data da8xx_emac_pdata;
diff --git a/arch/arm/mach-davinci/include/mach/gpio-davinci.h b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
deleted file mode 100644
index 1fdd1fd35448..000000000000
--- a/arch/arm/mach-davinci/include/mach/gpio-davinci.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * TI DaVinci GPIO Support
- *
- * Copyright (c) 2006 David Brownell
- * Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__DAVINCI_DAVINCI_GPIO_H
-#define	__DAVINCI_DAVINCI_GPIO_H
-
-#include <linux/io.h>
-#include <linux/spinlock.h>
-
-#include <asm-generic/gpio.h>
-
-#include <mach/irqs.h>
-#include <mach/common.h>
-
-#define DAVINCI_GPIO_BASE 0x01C67000
-
-enum davinci_gpio_type {
-	GPIO_TYPE_DAVINCI = 0,
-	GPIO_TYPE_TNETV107X,
-};
-
-/*
- * basic gpio routines
- *
- * board-specific init should be done by arch/.../.../board-XXX.c (maybe
- * initializing banks together) rather than boot loaders; kexec() won't
- * go through boot loaders.
- *
- * the gpio clock will be turned on when gpios are used, and you may also
- * need to pay attention to PINMUX registers to be sure those pins are
- * used as gpios, not with other peripherals.
- *
- * On-chip GPIOs are numbered 0..(DAVINCI_N_GPIO-1).  For documentation,
- * and maybe for later updates, code may write GPIO(N).  These may be
- * all 1.8V signals, all 3.3V ones, or a mix of the two.  A given chip
- * may not support all the GPIOs in that range.
- *
- * GPIOs can also be on external chips, numbered after the ones built-in
- * to the DaVinci chip.  For now, they won't be usable as IRQ sources.
- */
-#define	GPIO(X)		(X)		/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
-
-/* Convert GPIO signal to GPIO pin number */
-#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
-
-struct davinci_gpio_controller {
-	struct gpio_chip	chip;
-	int			irq_base;
-	spinlock_t		lock;
-	void __iomem		*regs;
-	void __iomem		*set_data;
-	void __iomem		*clr_data;
-	void __iomem		*in_data;
-};
-
-/* The __gpio_to_controller() and __gpio_mask() functions inline to constants
- * with constant parameters; or in outlined code they execute at runtime.
- *
- * You'd access the controller directly when reading or writing more than
- * one gpio value at a time, and to support wired logic where the value
- * being driven by the cpu need not match the value read back.
- *
- * These are NOT part of the cross-platform GPIO interface
- */
-static inline struct davinci_gpio_controller *
-__gpio_to_controller(unsigned gpio)
-{
-	struct davinci_gpio_controller *ctlrs = davinci_soc_info.gpio_ctlrs;
-	int index = gpio / 32;
-
-	if (!ctlrs || index >= davinci_soc_info.gpio_ctlrs_num)
-		return NULL;
-
-	return ctlrs + index;
-}
-
-static inline u32 __gpio_mask(unsigned gpio)
-{
-	return 1 << (gpio % 32);
-}
-
-#endif	/* __DAVINCI_DAVINCI_GPIO_H */
diff --git a/arch/arm/mach-davinci/include/mach/gpio.h b/arch/arm/mach-davinci/include/mach/gpio.h
deleted file mode 100644
index 960e9de47e1e..000000000000
--- a/arch/arm/mach-davinci/include/mach/gpio.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * TI DaVinci GPIO Support
- *
- * Copyright (c) 2006 David Brownell
- * Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__DAVINCI_GPIO_H
-#define	__DAVINCI_GPIO_H
-
-#include <asm-generic/gpio.h>
-
-#define __ARM_GPIOLIB_COMPLEX
-
-/* The inline versions use the static inlines in the driver header */
-#include "gpio-davinci.h"
-
-/*
- * The get/set/clear functions will inline when called with constant
- * parameters referencing built-in GPIOs, for low-overhead bitbanging.
- *
- * gpio_set_value() will inline only on traditional Davinci style controllers
- * with distinct set/clear registers.
- *
- * Otherwise, calls with variable parameters or referencing external
- * GPIOs (e.g. on GPIO expander chips) use outlined functions.
- */
-static inline void gpio_set_value(unsigned gpio, int value)
-{
-	if (__builtin_constant_p(value) && gpio < davinci_soc_info.gpio_num) {
-		struct davinci_gpio_controller *ctlr;
-		u32				mask;
-
-		ctlr = __gpio_to_controller(gpio);
-
-		if (ctlr->set_data != ctlr->clr_data) {
-			mask = __gpio_mask(gpio);
-			if (value)
-				__raw_writel(mask, ctlr->set_data);
-			else
-				__raw_writel(mask, ctlr->clr_data);
-			return;
-		}
-	}
-
-	__gpio_set_value(gpio, value);
-}
-
-/* Returns zero or nonzero; works for gpios configured as inputs OR
- * as outputs, at least for built-in GPIOs.
- *
- * NOTE: for built-in GPIOs, changes in reported values are synchronized
- * to the GPIO clock.  This is easily seen after calling gpio_set_value()
- * and then immediately gpio_get_value(), where the gpio_get_value() will
- * return the old value until the GPIO clock ticks and the new value gets
- * latched.
- */
-static inline int gpio_get_value(unsigned gpio)
-{
-	struct davinci_gpio_controller *ctlr;
-
-	if (!__builtin_constant_p(gpio) || gpio >= davinci_soc_info.gpio_num)
-		return __gpio_get_value(gpio);
-
-	ctlr = __gpio_to_controller(gpio);
-	return __gpio_mask(gpio) & __raw_readl(ctlr->in_data);
-}
-
-static inline int gpio_cansleep(unsigned gpio)
-{
-	if (__builtin_constant_p(gpio) && gpio < davinci_soc_info.gpio_num)
-		return 0;
-	else
-		return __gpio_cansleep(gpio);
-}
-
-static inline int irq_to_gpio(unsigned irq)
-{
-	/* don't support the reverse mapping */
-	return -ENOSYS;
-}
-
-#endif				/* __DAVINCI_GPIO_H */
diff --git a/arch/arm/mach-dove/board-dt.c b/arch/arm/mach-dove/board-dt.c
index ddb86631f16a..49fa9abd09da 100644
--- a/arch/arm/mach-dove/board-dt.c
+++ b/arch/arm/mach-dove/board-dt.c
@@ -19,35 +19,6 @@
 #include <plat/common.h>
 #include "common.h"
 
-/*
- * There are still devices that doesn't even know about DT,
- * get clock gates here and add a clock lookup.
- */
-static void __init dove_legacy_clk_init(void)
-{
-	struct device_node *np = of_find_compatible_node(NULL, NULL,
-					 "marvell,dove-gating-clock");
-	struct of_phandle_args clkspec;
-
-	clkspec.np = np;
-	clkspec.args_count = 1;
-
-	clkspec.args[0] = CLOCK_GATING_BIT_PCIE0;
-	orion_clkdev_add("0", "pcie",
-			 of_clk_get_from_provider(&clkspec));
-
-	clkspec.args[0] = CLOCK_GATING_BIT_PCIE1;
-	orion_clkdev_add("1", "pcie",
-			 of_clk_get_from_provider(&clkspec));
-}
-
-static void __init dove_dt_init_early(void)
-{
-	mvebu_mbus_init("marvell,dove-mbus",
-			BRIDGE_WINS_BASE, BRIDGE_WINS_SZ,
-			DOVE_MC_WINS_BASE, DOVE_MC_WINS_SZ);
-}
-
 static void __init dove_dt_init(void)
 {
 	pr_info("Dove 88AP510 SoC\n");
@@ -55,14 +26,7 @@ static void __init dove_dt_init(void)
 #ifdef CONFIG_CACHE_TAUROS2
 	tauros2_init(0);
 #endif
-	dove_setup_cpu_wins();
-
-	/* Setup clocks for legacy devices */
-	dove_legacy_clk_init();
-
-	/* Internal devices not ported to DT yet */
-	dove_pcie_init(1, 1);
-
+	BUG_ON(mvebu_mbus_dt_init());
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
@@ -73,7 +37,6 @@ static const char * const dove_dt_board_compat[] = {
 
 DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)")
 	.map_io		= dove_map_io,
-	.init_early	= dove_dt_init_early,
 	.init_machine	= dove_dt_init,
 	.restart	= dove_restart,
 	.dt_compat	= dove_dt_board_compat,
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index dba2173e70f3..8f1d327e0cd1 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -28,6 +28,7 @@ config CPU_S3C2410
 	select CPU_ARM920T
 	select CPU_LLSERIAL_S3C2410
 	select S3C2410_CLOCK
+	select S3C2410_DMA if S3C24XX_DMA
 	select ARM_S3C2410_CPUFREQ if ARM_S3C24XX_CPUFREQ
 	select S3C2410_PM if PM
 	select SAMSUNG_WDT_RESET
@@ -70,6 +71,7 @@ config CPU_S3C2442
 	select CPU_ARM920T
 	select CPU_LLSERIAL_S3C2440
 	select S3C2410_CLOCK
+	select S3C2410_DMA if S3C24XX_DMA
 	select S3C2410_PM if PM
 	help
 	  Support for S3C2442 Samsung Mobile CPU based systems.
@@ -148,7 +150,6 @@ config S3C2410_DMA_DEBUG
 config S3C2410_DMA
 	bool
 	depends on S3C24XX_DMA && (CPU_S3C2410 || CPU_S3C2442)
-	default y if CPU_S3C2410 || CPU_S3C2442
 	help
 	  DMA device selection for S3C2410 and compatible CPUs
 
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2412.c b/arch/arm/mach-s3c24xx/clock-s3c2412.c
index d8f253f2b486..11b3b28457bb 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2412.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2412.c
@@ -484,22 +484,22 @@ static struct clk init_clocks_disable[] = {
 
 static struct clk init_clocks[] = {
 	{
-		.name		= "dma",
+		.name		= "dma.0",
 		.parent		= &clk_h,
 		.enable		= s3c2412_clkcon_enable,
 		.ctrlbit	= S3C2412_CLKCON_DMA0,
 	}, {
-		.name		= "dma",
+		.name		= "dma.1",
 		.parent		= &clk_h,
 		.enable		= s3c2412_clkcon_enable,
 		.ctrlbit	= S3C2412_CLKCON_DMA1,
 	}, {
-		.name		= "dma",
+		.name		= "dma.2",
 		.parent		= &clk_h,
 		.enable		= s3c2412_clkcon_enable,
 		.ctrlbit	= S3C2412_CLKCON_DMA2,
 	}, {
-		.name		= "dma",
+		.name		= "dma.3",
 		.parent		= &clk_h,
 		.enable		= s3c2412_clkcon_enable,
 		.ctrlbit	= S3C2412_CLKCON_DMA3,
diff --git a/arch/arm/mach-s3c24xx/common-s3c2443.c b/arch/arm/mach-s3c24xx/common-s3c2443.c
index f6b9f2ef01bd..65d3eef73090 100644
--- a/arch/arm/mach-s3c24xx/common-s3c2443.c
+++ b/arch/arm/mach-s3c24xx/common-s3c2443.c
@@ -438,32 +438,32 @@ static struct clk init_clocks_off[] = {
 
 static struct clk init_clocks[] = {
 	{
-		.name		= "dma",
+		.name		= "dma.0",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA0,
 	}, {
-		.name		= "dma",
+		.name		= "dma.1",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA1,
 	}, {
-		.name		= "dma",
+		.name		= "dma.2",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA2,
 	}, {
-		.name		= "dma",
+		.name		= "dma.3",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA3,
 	}, {
-		.name		= "dma",
+		.name		= "dma.4",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA4,
 	}, {
-		.name		= "dma",
+		.name		= "dma.5",
 		.parent		= &clk_h,
 		.enable		= s3c2443_clkcon_enable_h,
 		.ctrlbit	= S3C2443_HCLKCON_DMA5,
diff --git a/arch/arm/mach-s3c24xx/common.c b/arch/arm/mach-s3c24xx/common.c
index 457261c98433..4adaa4b43ffe 100644
--- a/arch/arm/mach-s3c24xx/common.c
+++ b/arch/arm/mach-s3c24xx/common.c
@@ -31,6 +31,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/platform_data/dma-s3c24xx.h>
 
 #include <mach/hardware.h>
 #include <mach/regs-clock.h>
@@ -44,6 +45,7 @@
 
 #include <mach/regs-gpio.h>
 #include <plat/regs-serial.h>
+#include <mach/dma.h>
 
 #include <plat/cpu.h>
 #include <plat/devs.h>
@@ -329,3 +331,207 @@ void __init_or_cpufreq s3c24xx_setup_clocks(unsigned long fclk,
 	clk_p.rate = pclk;
 	clk_f.rate = fclk;
 }
+
+#if defined(CONFIG_CPU_S3C2410) || defined(CONFIG_CPU_S3C2412) || \
+	defined(CONFIG_CPU_S3C2440) || defined(CONFIG_CPU_S3C2442)
+static struct resource s3c2410_dma_resource[] = {
+	[0] = DEFINE_RES_MEM(S3C24XX_PA_DMA, S3C24XX_SZ_DMA),
+	[1] = DEFINE_RES_IRQ(IRQ_DMA0),
+	[2] = DEFINE_RES_IRQ(IRQ_DMA1),
+	[3] = DEFINE_RES_IRQ(IRQ_DMA2),
+	[4] = DEFINE_RES_IRQ(IRQ_DMA3),
+};
+#endif
+
+#if defined(CONFIG_CPU_S3C2410) || defined(CONFIG_CPU_S3C2442)
+static struct s3c24xx_dma_channel s3c2410_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 2), },
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2410_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2410_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2410_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2410_dma_platdata,
+	},
+};
+#endif
+
+#ifdef CONFIG_CPU_S3C2412
+static struct s3c24xx_dma_channel s3c2412_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, 17 },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, 18 },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, 10 },
+	[DMACH_SPI0_RX] = { S3C24XX_DMA_APB, true, 1 },
+	[DMACH_SPI0_TX] = { S3C24XX_DMA_APB, true, 0 },
+	[DMACH_SPI1_RX] = { S3C24XX_DMA_APB, true, 3 },
+	[DMACH_SPI1_TX] = { S3C24XX_DMA_APB, true, 2 },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, 19 },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, 21 },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, 23 },
+	[DMACH_UART0_SRC2] = { S3C24XX_DMA_APB, true, 20 },
+	[DMACH_UART1_SRC2] = { S3C24XX_DMA_APB, true, 22 },
+	[DMACH_UART2_SRC2] = { S3C24XX_DMA_APB, true, 24 },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, 9 },
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, 5 },
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, 4 },
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, 13 },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, 14 },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, 15 },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, 16 },
+};
+
+static struct s3c24xx_dma_platdata s3c2412_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2412_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2412_device_dma = {
+	.name		= "s3c2412-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2412_dma_platdata,
+	},
+};
+#endif
+
+#if defined(CONFIG_CPU_S3C2440)
+static struct s3c24xx_dma_channel s3c2440_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(6, 1) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 0) |
+						   S3C24XX_DMA_CHANREQ(0, 2),
+	},
+	[DMACH_PCM_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 0) |
+						  S3C24XX_DMA_CHANREQ(5, 2),
+	},
+	[DMACH_PCM_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 1) |
+						  S3C24XX_DMA_CHANREQ(6, 3),
+	},
+	[DMACH_MIC_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 2) |
+						  S3C24XX_DMA_CHANREQ(5, 3),
+	},
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2440_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2440_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2440_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2440_dma_platdata,
+	},
+};
+#endif
+
+#if defined(CONFIG_CPUS_3C2443) || defined(CONFIG_CPU_S3C2416)
+static struct resource s3c2443_dma_resource[] = {
+	[0] = DEFINE_RES_MEM(S3C24XX_PA_DMA, S3C24XX_SZ_DMA),
+	[1] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA0),
+	[2] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA1),
+	[3] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA2),
+	[4] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA3),
+	[5] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA4),
+	[6] = DEFINE_RES_IRQ(IRQ_S3C2443_DMA5),
+};
+
+static struct s3c24xx_dma_channel s3c2443_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, 17 },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, 18 },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, 10 },
+	[DMACH_SPI0_RX] = { S3C24XX_DMA_APB, true, 1 },
+	[DMACH_SPI0_TX] = { S3C24XX_DMA_APB, true, 0 },
+	[DMACH_SPI1_RX] = { S3C24XX_DMA_APB, true, 3 },
+	[DMACH_SPI1_TX] = { S3C24XX_DMA_APB, true, 2 },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, 19 },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, 21 },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, 23 },
+	[DMACH_UART3] = { S3C24XX_DMA_APB, true, 25 },
+	[DMACH_UART0_SRC2] = { S3C24XX_DMA_APB, true, 20 },
+	[DMACH_UART1_SRC2] = { S3C24XX_DMA_APB, true, 22 },
+	[DMACH_UART2_SRC2] = { S3C24XX_DMA_APB, true, 24 },
+	[DMACH_UART3_SRC2] = { S3C24XX_DMA_APB, true, 26 },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, 9 },
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, 5 },
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, 4 },
+	[DMACH_PCM_IN] = { S3C24XX_DMA_APB, true, 28 },
+	[DMACH_PCM_OUT] = { S3C24XX_DMA_APB, true, 27 },
+	[DMACH_MIC_IN] = { S3C24XX_DMA_APB, true, 29 },
+};
+
+static struct s3c24xx_dma_platdata s3c2443_dma_platdata = {
+	.num_phy_channels = 6,
+	.channels = s3c2443_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2443_device_dma = {
+	.name		= "s3c2443-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2443_dma_resource),
+	.resource	= s3c2443_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2443_dma_platdata,
+	},
+};
+#endif
diff --git a/arch/arm/mach-s3c24xx/common.h b/arch/arm/mach-s3c24xx/common.h
index 84b280654f4c..e46c10417216 100644
--- a/arch/arm/mach-s3c24xx/common.h
+++ b/arch/arm/mach-s3c24xx/common.h
@@ -109,4 +109,9 @@ extern void s3c2443_init_irq(void);
 
 extern struct syscore_ops s3c24xx_irq_syscore_ops;
 
+extern struct platform_device s3c2410_device_dma;
+extern struct platform_device s3c2412_device_dma;
+extern struct platform_device s3c2440_device_dma;
+extern struct platform_device s3c2443_device_dma;
+
 #endif /* __ARCH_ARM_MACH_S3C24XX_COMMON_H */
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index a45fcd8ccf79..43c23e220f5b 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -466,6 +466,7 @@ static struct platform_device *jive_devices[] __initdata = {
 	&jive_device_wm8750,
 	&s3c_device_nand,
 	&s3c_device_usbgadget,
+	&s3c2412_device_dma,
 };
 
 static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2413.c b/arch/arm/mach-s3c24xx/mach-smdk2413.c
index 8146e920f10d..c9d31ef28dd1 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2413.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2413.c
@@ -89,6 +89,7 @@ static struct platform_device *smdk2413_devices[] __initdata = {
 	&s3c_device_i2c0,
 	&s3c_device_iis,
 	&s3c_device_usbgadget,
+	&s3c2412_device_dma,
 };
 
 static void __init smdk2413_fixup(struct tag *tags, char **cmdline,
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2416.c b/arch/arm/mach-s3c24xx/mach-smdk2416.c
index cb46847c66b4..f88e672ad1e4 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2416.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2416.c
@@ -215,6 +215,7 @@ static struct platform_device *smdk2416_devices[] __initdata = {
 	&s3c_device_hsmmc0,
 	&s3c_device_hsmmc1,
 	&s3c_device_usb_hsudc,
+	&s3c2443_device_dma,
 };
 
 static void __init smdk2416_map_io(void)
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2443.c b/arch/arm/mach-s3c24xx/mach-smdk2443.c
index 9435c3bef18a..d9933fcc6cc8 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2443.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2443.c
@@ -115,6 +115,7 @@ static struct platform_device *smdk2443_devices[] __initdata = {
 #ifdef CONFIG_SND_SOC_SMDK2443_WM9710
 	&s3c_device_ac97,
 #endif
+	&s3c2443_device_dma,
 };
 
 static void __init smdk2443_map_io(void)
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index b66588428ec9..f7ec9c550787 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -126,6 +126,7 @@ static struct platform_device *vstms_devices[] __initdata = {
 	&s3c_device_iis,
 	&s3c_device_rtc,
 	&s3c_device_nand,
+	&s3c2412_device_dma,
 };
 
 static void __init vstms_fixup(struct tag *tags, char **cmdline,
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 25f40c9b7f62..99a3590f0349 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -32,6 +32,7 @@
 #include <linux/ioport.h>
 #include <linux/platform_data/s3c-hsudc.h>
 #include <linux/platform_data/s3c-hsotg.h>
+#include <linux/platform_data/dma-s3c24xx.h>
 
 #include <media/s5p_hdmi.h>
 
@@ -1465,8 +1466,10 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 	pd.num_cs = num_cs;
 	pd.src_clk_nr = src_clk_nr;
 	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio;
-#ifdef CONFIG_PL330_DMA
+#if defined(CONFIG_PL330_DMA)
 	pd.filter = pl330_filter;
+#elif defined(CONFIG_S3C24XX_DMAC)
+	pd.filter = s3c24xx_dma_filter;
 #endif
 
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0);
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 200926699778..bb5b90e8e768 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -18,11 +18,21 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #include <asm/cacheflush.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
 #include <asm/smp_plat.h>
 
+#define DRIVER_NAME		"CCI-400"
+#define DRIVER_NAME_PMU		DRIVER_NAME " PMU"
+#define PMU_NAME		"CCI_400"
+
 #define CCI_PORT_CTRL		0x0
 #define CCI_CTRL_STATUS		0xc
 
@@ -54,6 +64,568 @@ static unsigned int nb_cci_ports;
 static void __iomem *cci_ctrl_base;
 static unsigned long cci_ctrl_phys;
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+#define CCI_PMCR		0x0100
+#define CCI_PID2		0x0fe8
+
+#define CCI_PMCR_CEN		0x00000001
+#define CCI_PMCR_NCNT_MASK	0x0000f800
+#define CCI_PMCR_NCNT_SHIFT	11
+
+#define CCI_PID2_REV_MASK	0xf0
+#define CCI_PID2_REV_SHIFT	4
+
+/* Port ids */
+#define CCI_PORT_S0	0
+#define CCI_PORT_S1	1
+#define CCI_PORT_S2	2
+#define CCI_PORT_S3	3
+#define CCI_PORT_S4	4
+#define CCI_PORT_M0	5
+#define CCI_PORT_M1	6
+#define CCI_PORT_M2	7
+
+#define CCI_REV_R0		0
+#define CCI_REV_R1		1
+#define CCI_REV_R0_P4		4
+#define CCI_REV_R1_P2		6
+
+#define CCI_PMU_EVT_SEL		0x000
+#define CCI_PMU_CNTR		0x004
+#define CCI_PMU_CNTR_CTRL	0x008
+#define CCI_PMU_OVRFLW		0x00c
+
+#define CCI_PMU_OVRFLW_FLAG	1
+
+#define CCI_PMU_CNTR_BASE(idx)	((idx) * SZ_4K)
+
+/*
+ * Instead of an event id to monitor CCI cycles, a dedicated counter is
+ * provided. Use 0xff to represent CCI cycles and hope that no future revisions
+ * make use of this event in hardware.
+ */
+enum cci400_perf_events {
+	CCI_PMU_CYCLES = 0xff
+};
+
+#define CCI_PMU_EVENT_MASK		0xff
+#define CCI_PMU_EVENT_SOURCE(event)	((event >> 5) & 0x7)
+#define CCI_PMU_EVENT_CODE(event)	(event & 0x1f)
+
+#define CCI_PMU_MAX_HW_EVENTS 5   /* CCI PMU has 4 counters + 1 cycle counter */
+
+#define CCI_PMU_CYCLE_CNTR_IDX		0
+#define CCI_PMU_CNTR0_IDX		1
+#define CCI_PMU_CNTR_LAST(cci_pmu)	(CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1)
+
+/*
+ * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
+ * ports and bits 4:0 are event codes. There are different event codes
+ * associated with each port type.
+ *
+ * Additionally, the range of events associated with the port types changed
+ * between Rev0 and Rev1.
+ *
+ * The constants below define the range of valid codes for each port type for
+ * the different revisions and are used to validate the event to be monitored.
+ */
+
+#define CCI_REV_R0_SLAVE_PORT_MIN_EV	0x00
+#define CCI_REV_R0_SLAVE_PORT_MAX_EV	0x13
+#define CCI_REV_R0_MASTER_PORT_MIN_EV	0x14
+#define CCI_REV_R0_MASTER_PORT_MAX_EV	0x1a
+
+#define CCI_REV_R1_SLAVE_PORT_MIN_EV	0x00
+#define CCI_REV_R1_SLAVE_PORT_MAX_EV	0x14
+#define CCI_REV_R1_MASTER_PORT_MIN_EV	0x00
+#define CCI_REV_R1_MASTER_PORT_MAX_EV	0x11
+
+struct pmu_port_event_ranges {
+	u8 slave_min;
+	u8 slave_max;
+	u8 master_min;
+	u8 master_max;
+};
+
+static struct pmu_port_event_ranges port_event_range[] = {
+	[CCI_REV_R0] = {
+		.slave_min = CCI_REV_R0_SLAVE_PORT_MIN_EV,
+		.slave_max = CCI_REV_R0_SLAVE_PORT_MAX_EV,
+		.master_min = CCI_REV_R0_MASTER_PORT_MIN_EV,
+		.master_max = CCI_REV_R0_MASTER_PORT_MAX_EV,
+	},
+	[CCI_REV_R1] = {
+		.slave_min = CCI_REV_R1_SLAVE_PORT_MIN_EV,
+		.slave_max = CCI_REV_R1_SLAVE_PORT_MAX_EV,
+		.master_min = CCI_REV_R1_MASTER_PORT_MIN_EV,
+		.master_max = CCI_REV_R1_MASTER_PORT_MAX_EV,
+	},
+};
+
+struct cci_pmu_drv_data {
+	void __iomem *base;
+	struct arm_pmu *cci_pmu;
+	int nr_irqs;
+	int irqs[CCI_PMU_MAX_HW_EVENTS];
+	unsigned long active_irqs;
+	struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
+	unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
+	struct pmu_port_event_ranges *port_ranges;
+	struct pmu_hw_events hw_events;
+};
+static struct cci_pmu_drv_data *pmu;
+
+static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++)
+		if (irq == irqs[i])
+			return true;
+
+	return false;
+}
+
+static int probe_cci_revision(void)
+{
+	int rev;
+	rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
+	rev >>= CCI_PID2_REV_SHIFT;
+
+	if (rev <= CCI_REV_R0_P4)
+		return CCI_REV_R0;
+	else if (rev <= CCI_REV_R1_P2)
+		return CCI_REV_R1;
+
+	return -ENOENT;
+}
+
+static struct pmu_port_event_ranges *port_range_by_rev(void)
+{
+	int rev = probe_cci_revision();
+
+	if (rev < 0)
+		return NULL;
+
+	return &port_event_range[rev];
+}
+
+static int pmu_is_valid_slave_event(u8 ev_code)
+{
+	return pmu->port_ranges->slave_min <= ev_code &&
+		ev_code <= pmu->port_ranges->slave_max;
+}
+
+static int pmu_is_valid_master_event(u8 ev_code)
+{
+	return pmu->port_ranges->master_min <= ev_code &&
+		ev_code <= pmu->port_ranges->master_max;
+}
+
+static int pmu_validate_hw_event(u8 hw_event)
+{
+	u8 ev_source = CCI_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI_PMU_EVENT_CODE(hw_event);
+
+	switch (ev_source) {
+	case CCI_PORT_S0:
+	case CCI_PORT_S1:
+	case CCI_PORT_S2:
+	case CCI_PORT_S3:
+	case CCI_PORT_S4:
+		/* Slave Interface */
+		if (pmu_is_valid_slave_event(ev_code))
+			return hw_event;
+		break;
+	case CCI_PORT_M0:
+	case CCI_PORT_M1:
+	case CCI_PORT_M2:
+		/* Master Interface */
+		if (pmu_is_valid_master_event(ev_code))
+			return hw_event;
+		break;
+	}
+
+	return -ENOENT;
+}
+
+static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx)
+{
+	return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
+		idx <= CCI_PMU_CNTR_LAST(cci_pmu);
+}
+
+static u32 pmu_read_register(int idx, unsigned int offset)
+{
+	return readl_relaxed(pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+}
+
+static void pmu_write_register(u32 value, int idx, unsigned int offset)
+{
+	return writel_relaxed(value, pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+}
+
+static void pmu_disable_counter(int idx)
+{
+	pmu_write_register(0, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static void pmu_enable_counter(int idx)
+{
+	pmu_write_register(1, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static void pmu_set_event(int idx, unsigned long event)
+{
+	event &= CCI_PMU_EVENT_MASK;
+	pmu_write_register(event, idx, CCI_PMU_EVT_SEL);
+}
+
+static u32 pmu_get_max_counters(void)
+{
+	u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
+		      CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
+
+	/* add 1 for cycle counter */
+	return n_cnts + 1;
+}
+
+static struct pmu_hw_events *pmu_get_hw_events(void)
+{
+	return &pmu->hw_events;
+}
+
+static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_event = &event->hw;
+	unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK;
+	int idx;
+
+	if (cci_event == CCI_PMU_CYCLES) {
+		if (test_and_set_bit(CCI_PMU_CYCLE_CNTR_IDX, hw->used_mask))
+			return -EAGAIN;
+
+		return CCI_PMU_CYCLE_CNTR_IDX;
+	}
+
+	for (idx = CCI_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int pmu_map_event(struct perf_event *event)
+{
+	int mapping;
+	u8 config = event->attr.config & CCI_PMU_EVENT_MASK;
+
+	if (event->attr.type < PERF_TYPE_MAX)
+		return -ENOENT;
+
+	if (config == CCI_PMU_CYCLES)
+		mapping = config;
+	else
+		mapping = pmu_validate_hw_event(config);
+
+	return mapping;
+}
+
+static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+{
+	int i;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	if (unlikely(!pmu_device))
+		return -ENODEV;
+
+	if (pmu->nr_irqs < 1) {
+		dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Register all available CCI PMU interrupts. In the interrupt handler
+	 * we iterate over the counters checking for interrupt source (the
+	 * overflowing counter) and clear it.
+	 *
+	 * This should allow handling of non-unique interrupt for the counters.
+	 */
+	for (i = 0; i < pmu->nr_irqs; i++) {
+		int err = request_irq(pmu->irqs[i], handler, IRQF_SHARED,
+				"arm-cci-pmu", cci_pmu);
+		if (err) {
+			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+				pmu->irqs[i]);
+			return err;
+		}
+
+		set_bit(i, &pmu->active_irqs);
+	}
+
+	return 0;
+}
+
+static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	int idx, handled = IRQ_NONE;
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	regs = get_irq_regs();
+	/*
+	 * Iterate over counters and update the corresponding perf events.
+	 * This should work regardless of whether we have per-counter overflow
+	 * interrupt or a combined overflow interrupt.
+	 */
+	for (idx = CCI_PMU_CYCLE_CNTR_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
+		struct perf_event *event = events->events[idx];
+		struct hw_perf_event *hw_counter;
+
+		if (!event)
+			continue;
+
+		hw_counter = &event->hw;
+
+		/* Did this counter overflow? */
+		if (!pmu_read_register(idx, CCI_PMU_OVRFLW) & CCI_PMU_OVRFLW_FLAG)
+			continue;
+
+		pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW);
+
+		handled = IRQ_HANDLED;
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hw_counter->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cci_pmu->disable(event);
+	}
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+static void pmu_free_irq(struct arm_pmu *cci_pmu)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_irqs; i++) {
+		if (!test_and_clear_bit(i, &pmu->active_irqs))
+			continue;
+
+		free_irq(pmu->irqs[i], cci_pmu);
+	}
+}
+
+static void pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Configure the event to count, unless you are counting cycles */
+	if (idx != CCI_PMU_CYCLE_CNTR_IDX)
+		pmu_set_event(idx, hw_counter->config_base);
+
+	pmu_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void pmu_disable_event(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	pmu_disable_counter(idx);
+}
+
+static void pmu_start(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Enable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void pmu_stop(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u32 pmu_read_counter(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = pmu_read_register(idx, CCI_PMU_CNTR);
+
+	return value;
+}
+
+static void pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+	else
+		pmu_write_register(value, idx, CCI_PMU_CNTR);
+}
+
+static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev)
+{
+	*cci_pmu = (struct arm_pmu){
+		.name             = PMU_NAME,
+		.max_period       = (1LLU << 32) - 1,
+		.get_hw_events    = pmu_get_hw_events,
+		.get_event_idx    = pmu_get_event_idx,
+		.map_event        = pmu_map_event,
+		.request_irq      = pmu_request_irq,
+		.handle_irq       = pmu_handle_irq,
+		.free_irq         = pmu_free_irq,
+		.enable           = pmu_enable_event,
+		.disable          = pmu_disable_event,
+		.start            = pmu_start,
+		.stop             = pmu_stop,
+		.read_counter     = pmu_read_counter,
+		.write_counter    = pmu_write_counter,
+	};
+
+	cci_pmu->plat_device = pdev;
+	cci_pmu->num_events = pmu_get_max_counters();
+
+	return armpmu_register(cci_pmu, -1);
+}
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+	{
+		.compatible = "arm,cci-400-pmu",
+	},
+	{},
+};
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int i, ret, irq;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pmu->base))
+		return -ENOMEM;
+
+	/*
+	 * CCI PMU has 5 overflow signals - one per counter; but some may be tied
+	 * together to a common interrupt.
+	 */
+	pmu->nr_irqs = 0;
+	for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			break;
+
+		if (is_duplicate_irq(irq, pmu->irqs, pmu->nr_irqs))
+			continue;
+
+		pmu->irqs[pmu->nr_irqs++] = irq;
+	}
+
+	/*
+	 * Ensure that the device tree has as many interrupts as the number
+	 * of counters.
+	 */
+	if (i < CCI_PMU_MAX_HW_EVENTS) {
+		dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
+			i, CCI_PMU_MAX_HW_EVENTS);
+		return -EINVAL;
+	}
+
+	pmu->port_ranges = port_range_by_rev();
+	if (!pmu->port_ranges) {
+		dev_warn(&pdev->dev, "CCI PMU version not supported\n");
+		return -EINVAL;
+	}
+
+	pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL);
+	if (!pmu->cci_pmu)
+		return -ENOMEM;
+
+	pmu->hw_events.events = pmu->events;
+	pmu->hw_events.used_mask = pmu->used_mask;
+	raw_spin_lock_init(&pmu->hw_events.pmu_lock);
+
+	ret = cci_pmu_init(pmu->cci_pmu, pdev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int cci_platform_probe(struct platform_device *pdev)
+{
+	if (!cci_probed())
+		return -ENODEV;
+
+	return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+}
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 struct cpu_port {
 	u64 mpidr;
 	u32 port;
@@ -120,7 +692,7 @@ int cci_ace_get_port(struct device_node *dn)
 }
 EXPORT_SYMBOL_GPL(cci_ace_get_port);
 
-static void __init cci_ace_init_ports(void)
+static void cci_ace_init_ports(void)
 {
 	int port, cpu;
 	struct device_node *cpun;
@@ -386,7 +958,7 @@ static const struct of_device_id arm_cci_ctrl_if_matches[] = {
 	{},
 };
 
-static int __init cci_probe(void)
+static int cci_probe(void)
 {
 	struct cci_nb_ports const *cci_config;
 	int ret, i, nb_ace = 0, nb_ace_lite = 0;
@@ -490,7 +1062,7 @@ static int __init cci_probe(void)
 static int cci_init_status = -EAGAIN;
 static DEFINE_MUTEX(cci_probing);
 
-static int __init cci_init(void)
+static int cci_init(void)
 {
 	if (cci_init_status != -EAGAIN)
 		return cci_init_status;
@@ -502,18 +1074,55 @@ static int __init cci_init(void)
 	return cci_init_status;
 }
 
+#ifdef CONFIG_HW_PERF_EVENTS
+static struct platform_driver cci_pmu_driver = {
+	.driver = {
+		   .name = DRIVER_NAME_PMU,
+		   .of_match_table = arm_cci_pmu_matches,
+		  },
+	.probe = cci_pmu_probe,
+};
+
+static struct platform_driver cci_platform_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = arm_cci_matches,
+		  },
+	.probe = cci_platform_probe,
+};
+
+static int __init cci_platform_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&cci_pmu_driver);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&cci_platform_driver);
+}
+
+#else
+
+static int __init cci_platform_init(void)
+{
+	return 0;
+}
+
+#endif
 /*
  * To sort out early init calls ordering a helper function is provided to
  * check if the CCI driver has beed initialized. Function check if the driver
  * has been initialized, if not it calls the init function that probes
  * the driver and updates the return value.
  */
-bool __init cci_probed(void)
+bool cci_probed(void)
 {
 	return cci_init() == 0;
 }
 EXPORT_SYMBOL_GPL(cci_probed);
 
 early_initcall(cci_init);
+core_initcall(cci_platform_init);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ARM CCI support");
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 56fe803adcb1..c61a6eccf169 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -154,6 +154,18 @@ config TEGRA20_APB_DMA
 	  This DMA controller transfers data from memory to peripheral fifo
 	  or vice versa. It does not support memory to memory data transfer.
 
+config S3C24XX_DMAC
+	tristate "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX && !S3C24XX_DMA
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
+
 source "drivers/dma/sh/Kconfig"
 
 config COH901318
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index db89035b3626..0ce2da97e429 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 000000000000..4cb127978636
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1350 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS	8
+
+#define S3C24XX_DISRC			0x00
+#define S3C24XX_DISRCC			0x04
+#define S3C24XX_DISRCC_INC_INCREMENT	0
+#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB		0
+#define S3C24XX_DISRCC_LOC_APB		BIT(1)
+
+#define S3C24XX_DIDST			0x08
+#define S3C24XX_DIDSTC			0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT	0
+#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB		0
+#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0		0
+#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
+
+#define S3C24XX_DCON			0x10
+
+#define S3C24XX_DCON_TC_MASK		0xfffff
+#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
+#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
+#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT		20
+#define S3C24XX_DCON_AUTORELOAD		0
+#define S3C24XX_DCON_NORELOAD		BIT(22)
+#define S3C24XX_DCON_HWTRIG		BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT	24
+#define S3C24XX_DCON_SERV_SINGLE	0
+#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT		0
+#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
+#define S3C24XX_DCON_INT		BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK		0
+#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
+#define S3C24XX_DCON_DEMAND		0
+#define S3C24XX_DCON_HANDSHAKE		BIT(31)
+
+#define S3C24XX_DSTAT			0x14
+#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
+
+#define S3C24XX_DMASKTRIG		0x20
+#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
+#define S3C24XX_DMASKTRIG_ON		BIT(1)
+#define S3C24XX_DMASKTRIG_STOP		BIT(2)
+
+#define S3C24XX_DMAREQSEL		0x24
+#define S3C24XX_DMAREQSEL_HW		BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH		4
+#define S3C24XX_CHANSEL_VALID		BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK	7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+	int stride;
+	bool has_reqsel;
+	bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+	S3C24XX_DMA_CHAN_IDLE,
+	S3C24XX_DMA_CHAN_RUNNING,
+	S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ */
+struct s3c24xx_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	struct list_head *at;
+	u8 width;
+	u32 disrcc;
+	u32 didstc;
+	u32 dcon;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+	unsigned int			id;
+	bool				valid;
+	void __iomem			*base;
+	unsigned int			irq;
+	struct clk			*clk;
+	spinlock_t			lock;
+	struct s3c24xx_dma_chan		*serving;
+	struct s3c24xx_dma_engine	*host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrappped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+	int id;
+	const char *name;
+	struct virt_dma_chan vc;
+	struct s3c24xx_dma_phy *phy;
+	struct dma_slave_config cfg;
+	struct s3c24xx_txd *at;
+	struct s3c24xx_dma_engine *host;
+	enum s3c24xx_dma_chan_state state;
+	bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+	struct platform_device			*pdev;
+	const struct s3c24xx_dma_platdata	*pdata;
+	struct soc_data				*sdata;
+	void __iomem				*base;
+	struct dma_device			slave;
+	struct dma_device			memcpy;
+	struct s3c24xx_dma_phy			*phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+	return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+				  struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	int phyvalid;
+
+	/* every phy is valid for memcopy channels */
+	if (!s3cchan->slave)
+		return true;
+
+	/* On newer variants all phys can be used for all virtual channels */
+	if (s3cdma->sdata->has_reqsel)
+		return true;
+
+	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata;
+	struct s3c24xx_dma_phy *phy = NULL;
+	unsigned long flags;
+	int i;
+	int ret;
+
+	if (s3cchan->slave)
+		cdata = &pdata->channels[s3cchan->id];
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		phy = &s3cdma->phy_chans[i];
+
+		if (!phy->valid)
+			continue;
+
+		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+			continue;
+
+		spin_lock_irqsave(&phy->lock, flags);
+
+		if (!phy->serving) {
+			phy->serving = s3cchan;
+			spin_unlock_irqrestore(&phy->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&phy->lock, flags);
+	}
+
+	/* No physical channel available, cope with it */
+	if (i == s3cdma->pdata->num_phy_channels) {
+		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+		return NULL;
+	}
+
+	/* start the phy clock */
+	if (s3cdma->sdata->has_clocks) {
+		ret = clk_enable(phy->clk);
+		if (ret) {
+			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+				phy->id, ret);
+			phy->serving = NULL;
+			return NULL;
+		}
+	}
+
+	return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+	if (s3cdma->sdata->has_clocks)
+		clk_disable(phy->clk);
+
+	phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct s3c24xx_txd *txd = s3cchan->at;
+	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+	return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct s3c24xx_dma_chan *s3cchan,
+				  struct dma_slave_config *config)
+{
+	if (!s3cchan->slave)
+		return -EINVAL;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	s3cchan->cfg = *config;
+
+	return 0;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		INIT_LIST_HEAD(&txd->dsg_list);
+		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+	}
+
+	return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_sg *dsg, *_dsg;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+				       struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+	u32 dcon = txd->dcon;
+	u32 val;
+
+	/* transfer-size and -count from len and width */
+	switch (txd->width) {
+	case 1:
+		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+		break;
+	case 2:
+		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+		break;
+	case 4:
+		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+		break;
+	}
+
+	if (s3cchan->slave) {
+		struct s3c24xx_dma_channel *cdata =
+					&pdata->channels[s3cchan->id];
+
+		if (s3cdma->sdata->has_reqsel) {
+			writel_relaxed((cdata->chansel << 1) |
+							S3C24XX_DMAREQSEL_HW,
+					phy->base + S3C24XX_DMAREQSEL);
+		} else {
+			int csel = cdata->chansel >> (phy->id *
+							S3C24XX_CHANSEL_WIDTH);
+
+			csel &= S3C24XX_CHANSEL_REQ_MASK;
+			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+			dcon |= S3C24XX_DCON_HWTRIG;
+		}
+	} else {
+		if (s3cdma->sdata->has_reqsel)
+			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+	}
+
+	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+	val &= ~S3C24XX_DMASKTRIG_STOP;
+	val |= S3C24XX_DMASKTRIG_ON;
+
+	/* trigger the dma operation for memcpy transfers */
+	if (!s3cchan->slave)
+		val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+	writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+	list_del(&txd->vd.node);
+
+	s3cchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (s3c24xx_dma_phy_busy(phy))
+		cpu_relax();
+
+	/* point to the first element of the sg list */
+	txd->at = txd->dsg_list.next;
+	s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
+				struct s3c24xx_dma_chan *s3cchan)
+{
+	LIST_HEAD(head);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy;
+
+	phy = s3c24xx_dma_get_phy(s3cchan);
+	if (!phy) {
+		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+			s3cchan->name);
+		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+	struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	phy->serving = s3cchan;
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_chan *p, *next;
+
+retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next) {
+		list_for_each_entry(p, &s3cdma->slave.channels,
+				    vc.chan.device_node)
+			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == S3C24XX_DMA_CHAN_WAITING;
+		if (success)
+			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		s3c24xx_dma_put_phy(s3cchan->phy);
+	}
+
+	s3cchan->phy = NULL;
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_unmap_buffers(struct s3c24xx_txd *txd)
+{
+	struct device *dev = txd->vd.tx.chan->device->dev;
+	struct s3c24xx_sg *dsg;
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		else {
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		}
+	}
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+		else
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+	}
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+	if (!s3cchan->slave)
+		s3c24xx_dma_unmap_buffers(txd);
+
+	s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+	struct s3c24xx_dma_phy *phy = data;
+	struct s3c24xx_dma_chan *s3cchan = phy->serving;
+	struct s3c24xx_txd *txd;
+
+	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+	/*
+	 * Interrupts happen to notify the completion of a transfer and the
+	 * channel should have moved into its stop state already on its own.
+	 * Therefore interrupts on channels not bound to a virtual channel
+	 * should never happen. Nevertheless send a terminate command to the
+	 * channel if the unlikely case happens.
+	 */
+	if (unlikely(!s3cchan)) {
+		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+			phy->id);
+
+		s3c24xx_dma_terminate_phy(phy);
+
+		return IRQ_HANDLED;
+	}
+
+	spin_lock(&s3cchan->vc.lock);
+	txd = s3cchan->at;
+	if (txd) {
+		/* when more sg's are in this txd, start the next one */
+		if (!list_is_last(txd->at, &txd->dsg_list)) {
+			txd->at = txd->at->next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		} else {
+			s3cchan->at = NULL;
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * And start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&s3cchan->vc))
+				s3c24xx_dma_start_next_txd(s3cchan);
+			else
+				s3c24xx_dma_phy_free(s3cchan);
+		}
+	}
+	spin_unlock(&s3cchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		ret = s3c24xx_dma_set_runtime_config(s3cchan,
+					      (struct dma_slave_config *)arg);
+		break;
+	case DMA_TERMINATE_ALL:
+		if (!s3cchan->phy && !s3cchan->at) {
+			dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+				s3cchan->id);
+			ret = -EINVAL;
+			break;
+		}
+
+		s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		 /* Mark physical channel as free */
+		if (s3cchan->phy)
+			s3c24xx_dma_phy_free(s3cchan);
+
+		/* Dequeue current job */
+		if (s3cchan->at) {
+			s3c24xx_dma_desc_free(&s3cchan->at->vd);
+			s3cchan->at = NULL;
+		}
+
+		/* Dequeue jobs not yet fired as well */
+		s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	return ret;
+}
+
+static int s3c24xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (!txstate) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	vd = vchan_find_desc(&s3cchan->vc, cookie);
+	if (vd) {
+		/* On the issued list, so hasn't been processed yet */
+		txd = to_s3c24xx_txd(&vd->tx);
+
+		list_for_each_entry(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+	} else {
+		/*
+		 * Currently running, so sum over the pending sg's and
+		 * the currently active one.
+		 */
+		txd = s3cchan->at;
+
+		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+		list_for_each_entry_from(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+
+		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	int src_mod, dest_mod;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %d bytes from %s\n",
+			len, s3cchan->name);
+
+	if ((len & S3C24XX_DCON_TC_MASK) != len) {
+		dev_err(&s3cdma->pdev->dev, "memcpy size %d to large\n", len);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+	if (!dsg) {
+		s3c24xx_dma_free_txd(txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/*
+	 * Determine a suitable transfer width.
+	 * The DMA controller cannot fetch/store information which is not
+	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+	 * an address divisible by 4 - more generally addr % width must be 0.
+	 */
+	src_mod = src % 4;
+	dest_mod = dest % 4;
+	switch (len % 4) {
+	case 0:
+		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+		break;
+	case 2:
+		txd->width = ((src_mod == 2 || src_mod == 0) &&
+			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+		break;
+	default:
+		txd->width = 1;
+		break;
+	}
+
+	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+		     S3C24XX_DCON_SERV_WHOLE;
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int tmp;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+			sg_dma_len(sgl), s3cchan->name);
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	} else {
+		s3c24xx_dma_free_txd(txd);
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_dma_address(sg);
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_dma_address(sg);
+		}
+		break;
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	if (vchan_issue_pending(&s3cchan->vc)) {
+		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+			s3c24xx_dma_phy_alloc_and_start(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct s3c24xx_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(dmadev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->id = i;
+		chan->host = s3cdma;
+		chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		} else {
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		}
+		dev_dbg(dmadev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = s3c24xx_dma_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct s3c24xx_dma_chan *chan = NULL;
+	struct s3c24xx_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node)
+		list_del(&chan->vc.chan.device_node);
+}
+
+/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */
+static struct soc_data soc_s3c2410 = {
+	.stride = 0x40,
+	.has_reqsel = false,
+	.has_clocks = false,
+};
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+	.stride = 0x40,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+	.stride = 0x100,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+	{
+		.name		= "s3c2410-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
+	}, {
+		.name		= "s3c2412-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
+	}, {
+		.name		= "s3c2443-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
+	},
+	{ },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+	return (struct soc_data *)
+			 platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma;
+	struct soc_data *sdata;
+	struct resource *res;
+	int ret;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	/* Basic sanity check */
+	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+			pdata->num_phy_channels, MAX_DMA_CHANNELS);
+		return -EINVAL;
+	}
+
+	sdata = s3c24xx_dma_get_soc_data(pdev);
+	if (!sdata)
+		return -EINVAL;
+
+	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+	if (!s3cdma)
+		return -ENOMEM;
+
+	s3cdma->pdev = pdev;
+	s3cdma->pdata = pdata;
+	s3cdma->sdata = sdata;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(s3cdma->base))
+		return PTR_ERR(s3cdma->base);
+
+	s3cdma->phy_chans = devm_kzalloc(&pdev->dev,
+					      sizeof(struct s3c24xx_dma_phy) *
+							pdata->num_phy_channels,
+					      GFP_KERNEL);
+	if (!s3cdma->phy_chans)
+		return -ENOMEM;
+
+	/* aquire irqs and clocks for all physical channels */
+	for (i = 0; i < pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+		char clk_name[6];
+
+		phy->id = i;
+		phy->base = s3cdma->base + (i * sdata->stride);
+		phy->host = s3cdma;
+
+		phy->irq = platform_get_irq(pdev, i);
+		if (phy->irq < 0) {
+			dev_err(&pdev->dev, "failed to get irq %d, err %d\n",
+				i, phy->irq);
+			continue;
+		}
+
+		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+				       0, pdev->name, phy);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+				i, ret);
+			continue;
+		}
+
+		if (sdata->has_clocks) {
+			sprintf(clk_name, "dma.%d", i);
+			phy->clk = devm_clk_get(&pdev->dev, clk_name);
+			if (IS_ERR(phy->clk) && sdata->has_clocks) {
+				dev_err(&pdev->dev, "unable to aquire clock for channel %d, error %lu",
+					i, PTR_ERR(phy->clk));
+				continue;
+			}
+
+			ret = clk_prepare(phy->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+					i, ret);
+				continue;
+			}
+		}
+
+		spin_lock_init(&phy->lock);
+		phy->valid = true;
+
+		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+	s3cdma->memcpy.dev = &pdev->dev;
+	s3cdma->memcpy.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->memcpy.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->memcpy.device_control = s3c24xx_dma_control;
+
+	/* Initialize slave engine for SoC internal dedicated peripherals */
+	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+	s3cdma->slave.dev = &pdev->dev;
+	s3cdma->slave.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->slave.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_control = s3c24xx_dma_control;
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+						pdata->num_phy_channels, false);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto err_memcpy;
+	}
+
+	/* Register slave channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+				pdata->num_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto err_slave;
+	}
+
+	ret = dma_async_device_register(&s3cdma->memcpy);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto err_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&s3cdma->slave);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, s3cdma);
+	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+		 pdata->num_phy_channels);
+
+	return 0;
+
+err_slave_reg:
+	dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return ret;
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+	int i;
+
+	dma_async_device_unregister(&s3cdma->slave);
+	dma_async_device_unregister(&s3cdma->memcpy);
+
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+	.driver		= {
+		.name	= "s3c24xx-dma",
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= s3c24xx_dma_driver_ids,
+	.probe		= s3c24xx_dma_probe,
+	.remove		= s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct s3c24xx_dma_chan *s3cchan;
+
+	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+		return false;
+
+	s3cchan = to_s3c24xx_dma_chan(chan);
+
+	return s3cchan->id == (int)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 17df6db5dca7..8847adf392b7 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -15,8 +15,9 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
-
-#include <asm/mach/irq.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 struct davinci_gpio_regs {
 	u32	dir;
@@ -31,13 +32,14 @@ struct davinci_gpio_regs {
 	u32	intstat;
 };
 
+#define BINTEN	0x8 /* GPIO Interrupt Per-Bank Enable Register */
+
 #define chip2controller(chip)	\
 	container_of(chip, struct davinci_gpio_controller, chip)
 
-static struct davinci_gpio_controller chips[DIV_ROUND_UP(DAVINCI_N_GPIO, 32)];
 static void __iomem *gpio_base;
 
-static struct davinci_gpio_regs __iomem __init *gpio2regs(unsigned gpio)
+static struct davinci_gpio_regs __iomem *gpio2regs(unsigned gpio)
 {
 	void __iomem *ptr;
 
@@ -65,7 +67,7 @@ static inline struct davinci_gpio_regs __iomem *irq2regs(int irq)
 	return g;
 }
 
-static int __init davinci_gpio_irq_setup(void);
+static int davinci_gpio_irq_setup(struct platform_device *pdev);
 
 /*--------------------------------------------------------------------------*/
 
@@ -131,33 +133,53 @@ davinci_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	__raw_writel((1 << offset), value ? &g->set_data : &g->clr_data);
 }
 
-static int __init davinci_gpio_setup(void)
+static int davinci_gpio_probe(struct platform_device *pdev)
 {
 	int i, base;
 	unsigned ngpio;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-	struct davinci_gpio_regs *regs;
+	struct davinci_gpio_controller *chips;
+	struct davinci_gpio_platform_data *pdata;
+	struct davinci_gpio_regs __iomem *regs;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
 
-	if (soc_info->gpio_type != GPIO_TYPE_DAVINCI)
-		return 0;
+	pdata = dev->platform_data;
+	if (!pdata) {
+		dev_err(dev, "No platform data found\n");
+		return -EINVAL;
+	}
 
 	/*
 	 * The gpio banks conceptually expose a segmented bitmap,
 	 * and "ngpio" is one more than the largest zero-based
 	 * bit index that's valid.
 	 */
-	ngpio = soc_info->gpio_num;
+	ngpio = pdata->ngpio;
 	if (ngpio == 0) {
-		pr_err("GPIO setup:  how many GPIOs?\n");
+		dev_err(dev, "How many GPIOs?\n");
 		return -EINVAL;
 	}
 
 	if (WARN_ON(DAVINCI_N_GPIO < ngpio))
 		ngpio = DAVINCI_N_GPIO;
 
-	gpio_base = ioremap(soc_info->gpio_base, SZ_4K);
-	if (WARN_ON(!gpio_base))
+	chips = devm_kzalloc(dev,
+			     ngpio * sizeof(struct davinci_gpio_controller),
+			     GFP_KERNEL);
+	if (!chips) {
+		dev_err(dev, "Memory allocation failed\n");
 		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Invalid memory resource\n");
+		return -EBUSY;
+	}
+
+	gpio_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(gpio_base))
+		return PTR_ERR(gpio_base);
 
 	for (i = 0, base = 0; base < ngpio; i++, base += 32) {
 		chips[i].chip.label = "DaVinci";
@@ -183,13 +205,10 @@ static int __init davinci_gpio_setup(void)
 		gpiochip_add(&chips[i].chip);
 	}
 
-	soc_info->gpio_ctlrs = chips;
-	soc_info->gpio_ctlrs_num = DIV_ROUND_UP(ngpio, 32);
-
-	davinci_gpio_irq_setup();
+	platform_set_drvdata(pdev, chips);
+	davinci_gpio_irq_setup(pdev);
 	return 0;
 }
-pure_initcall(davinci_gpio_setup);
 
 /*--------------------------------------------------------------------------*/
 /*
@@ -302,13 +321,14 @@ static int gpio_to_irq_banked(struct gpio_chip *chip, unsigned offset)
 
 static int gpio_to_irq_unbanked(struct gpio_chip *chip, unsigned offset)
 {
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	struct davinci_gpio_controller *d = chip2controller(chip);
 
-	/* NOTE:  we assume for now that only irqs in the first gpio_chip
+	/*
+	 * NOTE:  we assume for now that only irqs in the first gpio_chip
 	 * can provide direct-mapped IRQs to AINTC (up to 32 GPIOs).
 	 */
-	if (offset < soc_info->gpio_unbanked)
-		return soc_info->gpio_irq + offset;
+	if (offset < d->irq_base)
+		return d->gpio_irq + offset;
 	else
 		return -ENODEV;
 }
@@ -317,12 +337,11 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
 {
 	struct davinci_gpio_controller *d;
 	struct davinci_gpio_regs __iomem *g;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
 	u32 mask;
 
 	d = (struct davinci_gpio_controller *)data->handler_data;
 	g = (struct davinci_gpio_regs __iomem *)d->regs;
-	mask = __gpio_mask(data->irq - soc_info->gpio_irq);
+	mask = __gpio_mask(data->irq - d->gpio_irq);
 
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
@@ -343,24 +362,33 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
  * (dm6446) can be set appropriately for GPIOV33 pins.
  */
 
-static int __init davinci_gpio_irq_setup(void)
+static int davinci_gpio_irq_setup(struct platform_device *pdev)
 {
 	unsigned	gpio, irq, bank;
 	struct clk	*clk;
 	u32		binten = 0;
 	unsigned	ngpio, bank_irq;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-	struct davinci_gpio_regs	__iomem *g;
+	struct device *dev = &pdev->dev;
+	struct resource	*res;
+	struct davinci_gpio_controller *chips = platform_get_drvdata(pdev);
+	struct davinci_gpio_platform_data *pdata = dev->platform_data;
+	struct davinci_gpio_regs __iomem *g;
 
-	ngpio = soc_info->gpio_num;
-
-	bank_irq = soc_info->gpio_irq;
-	if (bank_irq == 0) {
-		printk(KERN_ERR "Don't know first GPIO bank IRQ.\n");
-		return -EINVAL;
+	ngpio = pdata->ngpio;
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "Invalid IRQ resource\n");
+		return -EBUSY;
 	}
 
-	clk = clk_get(NULL, "gpio");
+	bank_irq = res->start;
+
+	if (!bank_irq) {
+		dev_err(dev, "Invalid IRQ resource\n");
+		return -ENODEV;
+	}
+
+	clk = devm_clk_get(dev, "gpio");
 	if (IS_ERR(clk)) {
 		printk(KERN_ERR "Error %ld getting gpio clock?\n",
 		       PTR_ERR(clk));
@@ -368,16 +396,17 @@ static int __init davinci_gpio_irq_setup(void)
 	}
 	clk_prepare_enable(clk);
 
-	/* Arrange gpio_to_irq() support, handling either direct IRQs or
+	/*
+	 * Arrange gpio_to_irq() support, handling either direct IRQs or
 	 * banked IRQs.  Having GPIOs in the first GPIO bank use direct
 	 * IRQs, while the others use banked IRQs, would need some setup
 	 * tweaks to recognize hardware which can do that.
 	 */
 	for (gpio = 0, bank = 0; gpio < ngpio; bank++, gpio += 32) {
 		chips[bank].chip.to_irq = gpio_to_irq_banked;
-		chips[bank].irq_base = soc_info->gpio_unbanked
+		chips[bank].irq_base = pdata->gpio_unbanked
 			? -EINVAL
-			: (soc_info->intc_irq_num + gpio);
+			: (pdata->intc_irq_num + gpio);
 	}
 
 	/*
@@ -385,7 +414,7 @@ static int __init davinci_gpio_irq_setup(void)
 	 * controller only handling trigger modes.  We currently assume no
 	 * IRQ mux conflicts; gpio_irq_type_unbanked() is only for GPIOs.
 	 */
-	if (soc_info->gpio_unbanked) {
+	if (pdata->gpio_unbanked) {
 		static struct irq_chip_type gpio_unbanked;
 
 		/* pass "bank 0" GPIO IRQs to AINTC */
@@ -405,7 +434,7 @@ static int __init davinci_gpio_irq_setup(void)
 		__raw_writel(~0, &g->set_rising);
 
 		/* set the direct IRQs up to use that irqchip */
-		for (gpio = 0; gpio < soc_info->gpio_unbanked; gpio++, irq++) {
+		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++, irq++) {
 			irq_set_chip(irq, &gpio_unbanked.chip);
 			irq_set_handler_data(irq, &chips[gpio / 32]);
 			irq_set_status_flags(irq, IRQ_TYPE_EDGE_BOTH);
@@ -450,12 +479,31 @@ static int __init davinci_gpio_irq_setup(void)
 	}
 
 done:
-	/* BINTEN -- per-bank interrupt enable. genirq would also let these
+	/*
+	 * BINTEN -- per-bank interrupt enable. genirq would also let these
 	 * bits be set/cleared dynamically.
 	 */
-	__raw_writel(binten, gpio_base + 0x08);
+	__raw_writel(binten, gpio_base + BINTEN);
 
 	printk(KERN_INFO "DaVinci: %d gpio irqs\n", irq - gpio_to_irq(0));
 
 	return 0;
 }
+
+static struct platform_driver davinci_gpio_driver = {
+	.probe		= davinci_gpio_probe,
+	.driver		= {
+		.name	= "davinci_gpio",
+		.owner	= THIS_MODULE,
+	},
+};
+
+/**
+ * GPIO driver registration needs to be done before machine_init functions
+ * access GPIO. Hence davinci_gpio_drv_reg() is a postcore_initcall.
+ */
+static int __init davinci_gpio_drv_reg(void)
+{
+	return platform_driver_register(&davinci_gpio_driver);
+}
+postcore_initcall(davinci_gpio_drv_reg);
diff --git a/drivers/gpio/gpio-tnetv107x.c b/drivers/gpio/gpio-tnetv107x.c
index 3fa3e2867e19..58445bb69106 100644
--- a/drivers/gpio/gpio-tnetv107x.c
+++ b/drivers/gpio/gpio-tnetv107x.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/platform_data/gpio-davinci.h>
 
 #include <mach/common.h>
 #include <mach/tnetv107x.h>
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index bb328a366122..433cc8568dec 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -21,7 +21,10 @@
 #include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/of_pci.h>
 #include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/msi.h>
 #include <asm/mach/arch.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
@@ -51,12 +54,22 @@
 #define IPI_DOORBELL_START                      (0)
 #define IPI_DOORBELL_END                        (8)
 #define IPI_DOORBELL_MASK                       0xFF
+#define PCI_MSI_DOORBELL_START                  (16)
+#define PCI_MSI_DOORBELL_NR                     (16)
+#define PCI_MSI_DOORBELL_END                    (32)
+#define PCI_MSI_DOORBELL_MASK                   0xFFFF0000
 
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
 static void __iomem *per_cpu_int_base;
 static void __iomem *main_int_base;
 static struct irq_domain *armada_370_xp_mpic_domain;
+#ifdef CONFIG_PCI_MSI
+static struct irq_domain *armada_370_xp_msi_domain;
+static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR);
+static DEFINE_MUTEX(msi_used_lock);
+static phys_addr_t msi_doorbell_addr;
+#endif
 
 /*
  * In SMP mode:
@@ -87,6 +100,144 @@ static void armada_370_xp_irq_unmask(struct irq_data *d)
 				ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
 }
 
+#ifdef CONFIG_PCI_MSI
+
+static int armada_370_xp_alloc_msi(void)
+{
+	int hwirq;
+
+	mutex_lock(&msi_used_lock);
+	hwirq = find_first_zero_bit(&msi_used, PCI_MSI_DOORBELL_NR);
+	if (hwirq >= PCI_MSI_DOORBELL_NR)
+		hwirq = -ENOSPC;
+	else
+		set_bit(hwirq, msi_used);
+	mutex_unlock(&msi_used_lock);
+
+	return hwirq;
+}
+
+static void armada_370_xp_free_msi(int hwirq)
+{
+	mutex_lock(&msi_used_lock);
+	if (!test_bit(hwirq, msi_used))
+		pr_err("trying to free unused MSI#%d\n", hwirq);
+	else
+		clear_bit(hwirq, msi_used);
+	mutex_unlock(&msi_used_lock);
+}
+
+static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
+				       struct pci_dev *pdev,
+				       struct msi_desc *desc)
+{
+	struct msi_msg msg;
+	irq_hw_number_t hwirq;
+	int virq;
+
+	hwirq = armada_370_xp_alloc_msi();
+	if (hwirq < 0)
+		return hwirq;
+
+	virq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
+	if (!virq) {
+		armada_370_xp_free_msi(hwirq);
+		return -EINVAL;
+	}
+
+	irq_set_msi_desc(virq, desc);
+
+	msg.address_lo = msi_doorbell_addr;
+	msg.address_hi = 0;
+	msg.data = 0xf00 | (hwirq + 16);
+
+	write_msi_msg(virq, &msg);
+	return 0;
+}
+
+static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
+					   unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	irq_dispose_mapping(irq);
+	armada_370_xp_free_msi(d->hwirq);
+}
+
+static struct irq_chip armada_370_xp_msi_irq_chip = {
+	.name = "armada_370_xp_msi_irq",
+	.irq_enable = unmask_msi_irq,
+	.irq_disable = mask_msi_irq,
+	.irq_mask = mask_msi_irq,
+	.irq_unmask = unmask_msi_irq,
+};
+
+static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq,
+				 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip,
+				 handle_simple_irq);
+	set_irq_flags(virq, IRQF_VALID);
+
+	return 0;
+}
+
+static const struct irq_domain_ops armada_370_xp_msi_irq_ops = {
+	.map = armada_370_xp_msi_map,
+};
+
+static int armada_370_xp_msi_init(struct device_node *node,
+				  phys_addr_t main_int_phys_base)
+{
+	struct msi_chip *msi_chip;
+	u32 reg;
+	int ret;
+
+	msi_doorbell_addr = main_int_phys_base +
+		ARMADA_370_XP_SW_TRIG_INT_OFFS;
+
+	msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL);
+	if (!msi_chip)
+		return -ENOMEM;
+
+	msi_chip->setup_irq = armada_370_xp_setup_msi_irq;
+	msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq;
+	msi_chip->of_node = node;
+
+	armada_370_xp_msi_domain =
+		irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR,
+				      &armada_370_xp_msi_irq_ops,
+				      NULL);
+	if (!armada_370_xp_msi_domain) {
+		kfree(msi_chip);
+		return -ENOMEM;
+	}
+
+	ret = of_pci_msi_chip_add(msi_chip);
+	if (ret < 0) {
+		irq_domain_remove(armada_370_xp_msi_domain);
+		kfree(msi_chip);
+		return ret;
+	}
+
+	reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
+		| PCI_MSI_DOORBELL_MASK;
+
+	writel(reg, per_cpu_int_base +
+	       ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
+
+	/* Unmask IPI interrupt */
+	writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+
+	return 0;
+}
+#else
+static inline int armada_370_xp_msi_init(struct device_node *node,
+					 phys_addr_t main_int_phys_base)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SMP
 static int armada_xp_set_affinity(struct irq_data *d,
 				  const struct cpumask *mask_val, bool force)
@@ -214,12 +365,39 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 		if (irqnr > 1022)
 			break;
 
-		if (irqnr > 0) {
+		if (irqnr > 1) {
 			irqnr =	irq_find_mapping(armada_370_xp_mpic_domain,
 					irqnr);
 			handle_IRQ(irqnr, regs);
 			continue;
 		}
+
+#ifdef CONFIG_PCI_MSI
+		/* MSI handling */
+		if (irqnr == 1) {
+			u32 msimask, msinr;
+
+			msimask = readl_relaxed(per_cpu_int_base +
+						ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS)
+				& PCI_MSI_DOORBELL_MASK;
+
+			writel(~PCI_MSI_DOORBELL_MASK, per_cpu_int_base +
+			       ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS);
+
+			for (msinr = PCI_MSI_DOORBELL_START;
+			     msinr < PCI_MSI_DOORBELL_END; msinr++) {
+				int irq;
+
+				if (!(msimask & BIT(msinr)))
+					continue;
+
+				irq = irq_find_mapping(armada_370_xp_msi_domain,
+						       msinr - 16);
+				handle_IRQ(irq, regs);
+			}
+		}
+#endif
+
 #ifdef CONFIG_SMP
 		/* IPI Handling */
 		if (irqnr == 0) {
@@ -248,12 +426,25 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 					     struct device_node *parent)
 {
+	struct resource main_int_res, per_cpu_int_res;
 	u32 control;
 
-	main_int_base = of_iomap(node, 0);
-	per_cpu_int_base = of_iomap(node, 1);
+	BUG_ON(of_address_to_resource(node, 0, &main_int_res));
+	BUG_ON(of_address_to_resource(node, 1, &per_cpu_int_res));
 
+	BUG_ON(!request_mem_region(main_int_res.start,
+				   resource_size(&main_int_res),
+				   node->full_name));
+	BUG_ON(!request_mem_region(per_cpu_int_res.start,
+				   resource_size(&per_cpu_int_res),
+				   node->full_name));
+
+	main_int_base = ioremap(main_int_res.start,
+				resource_size(&main_int_res));
 	BUG_ON(!main_int_base);
+
+	per_cpu_int_base = ioremap(per_cpu_int_res.start,
+				   resource_size(&per_cpu_int_res));
 	BUG_ON(!per_cpu_int_base);
 
 	control = readl(main_int_base + ARMADA_370_XP_INT_CONTROL);
@@ -262,8 +453,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 		irq_domain_add_linear(node, (control >> 2) & 0x3ff,
 				&armada_370_xp_mpic_irq_ops, NULL);
 
-	if (!armada_370_xp_mpic_domain)
-		panic("Unable to add Armada_370_Xp MPIC irq domain (DT)\n");
+	BUG_ON(!armada_370_xp_mpic_domain);
 
 	irq_set_default_host(armada_370_xp_mpic_domain);
 
@@ -280,6 +470,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 
 #endif
 
+	armada_370_xp_msi_init(node, main_int_res.start);
+
 	set_handle_irq(armada_370_xp_handle_irq);
 
 	return 0;
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 3d9504811126..43186feb4294 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -3,7 +3,7 @@ menu "PCI host controller drivers"
 
 config PCI_MVEBU
 	bool "Marvell EBU PCIe controller"
-	depends on ARCH_MVEBU || ARCH_KIRKWOOD
+	depends on ARCH_MVEBU || ARCH_DOVE || ARCH_KIRKWOOD
 	depends on OF
 
 config PCIE_DW
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 729d5a101d62..80b2250ea19a 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -9,13 +9,17 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/mbus.h>
+#include <linux/msi.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
-#include <linux/of_pci.h>
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/of_pci.h>
 #include <linux/of_platform.h>
 
 /*
@@ -103,6 +107,7 @@ struct mvebu_pcie_port;
 struct mvebu_pcie {
 	struct platform_device *pdev;
 	struct mvebu_pcie_port *ports;
+	struct msi_chip *msi;
 	struct resource io;
 	struct resource realio;
 	struct resource mem;
@@ -115,7 +120,6 @@ struct mvebu_pcie_port {
 	char *name;
 	void __iomem *base;
 	spinlock_t conf_lock;
-	int haslink;
 	u32 port;
 	u32 lane;
 	int devfn;
@@ -124,6 +128,9 @@ struct mvebu_pcie_port {
 	unsigned int io_target;
 	unsigned int io_attr;
 	struct clk *clk;
+	int reset_gpio;
+	int reset_active_low;
+	char *reset_name;
 	struct mvebu_sw_pci_bridge bridge;
 	struct device_node *dn;
 	struct mvebu_pcie *pcie;
@@ -133,29 +140,39 @@ struct mvebu_pcie_port {
 	size_t iowin_size;
 };
 
+static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
+{
+	writel(val, port->base + reg);
+}
+
+static inline u32 mvebu_readl(struct mvebu_pcie_port *port, u32 reg)
+{
+	return readl(port->base + reg);
+}
+
 static bool mvebu_pcie_link_up(struct mvebu_pcie_port *port)
 {
-	return !(readl(port->base + PCIE_STAT_OFF) & PCIE_STAT_LINK_DOWN);
+	return !(mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_LINK_DOWN);
 }
 
 static void mvebu_pcie_set_local_bus_nr(struct mvebu_pcie_port *port, int nr)
 {
 	u32 stat;
 
-	stat = readl(port->base + PCIE_STAT_OFF);
+	stat = mvebu_readl(port, PCIE_STAT_OFF);
 	stat &= ~PCIE_STAT_BUS;
 	stat |= nr << 8;
-	writel(stat, port->base + PCIE_STAT_OFF);
+	mvebu_writel(port, stat, PCIE_STAT_OFF);
 }
 
 static void mvebu_pcie_set_local_dev_nr(struct mvebu_pcie_port *port, int nr)
 {
 	u32 stat;
 
-	stat = readl(port->base + PCIE_STAT_OFF);
+	stat = mvebu_readl(port, PCIE_STAT_OFF);
 	stat &= ~PCIE_STAT_DEV;
 	stat |= nr << 16;
-	writel(stat, port->base + PCIE_STAT_OFF);
+	mvebu_writel(port, stat, PCIE_STAT_OFF);
 }
 
 /*
@@ -163,7 +180,7 @@ static void mvebu_pcie_set_local_dev_nr(struct mvebu_pcie_port *port, int nr)
  * BAR[0,2] -> disabled, BAR[1] -> covers all DRAM banks
  * WIN[0-3] -> DRAM bank[0-3]
  */
-static void __init mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
+static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
 {
 	const struct mbus_dram_target_info *dram;
 	u32 size;
@@ -173,33 +190,34 @@ static void __init mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
 
 	/* First, disable and clear BARs and windows. */
 	for (i = 1; i < 3; i++) {
-		writel(0, port->base + PCIE_BAR_CTRL_OFF(i));
-		writel(0, port->base + PCIE_BAR_LO_OFF(i));
-		writel(0, port->base + PCIE_BAR_HI_OFF(i));
+		mvebu_writel(port, 0, PCIE_BAR_CTRL_OFF(i));
+		mvebu_writel(port, 0, PCIE_BAR_LO_OFF(i));
+		mvebu_writel(port, 0, PCIE_BAR_HI_OFF(i));
 	}
 
 	for (i = 0; i < 5; i++) {
-		writel(0, port->base + PCIE_WIN04_CTRL_OFF(i));
-		writel(0, port->base + PCIE_WIN04_BASE_OFF(i));
-		writel(0, port->base + PCIE_WIN04_REMAP_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_CTRL_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_BASE_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_REMAP_OFF(i));
 	}
 
-	writel(0, port->base + PCIE_WIN5_CTRL_OFF);
-	writel(0, port->base + PCIE_WIN5_BASE_OFF);
-	writel(0, port->base + PCIE_WIN5_REMAP_OFF);
+	mvebu_writel(port, 0, PCIE_WIN5_CTRL_OFF);
+	mvebu_writel(port, 0, PCIE_WIN5_BASE_OFF);
+	mvebu_writel(port, 0, PCIE_WIN5_REMAP_OFF);
 
 	/* Setup windows for DDR banks.  Count total DDR size on the fly. */
 	size = 0;
 	for (i = 0; i < dram->num_cs; i++) {
 		const struct mbus_dram_window *cs = dram->cs + i;
 
-		writel(cs->base & 0xffff0000,
-		       port->base + PCIE_WIN04_BASE_OFF(i));
-		writel(0, port->base + PCIE_WIN04_REMAP_OFF(i));
-		writel(((cs->size - 1) & 0xffff0000) |
-			(cs->mbus_attr << 8) |
-			(dram->mbus_dram_target_id << 4) | 1,
-		       port->base + PCIE_WIN04_CTRL_OFF(i));
+		mvebu_writel(port, cs->base & 0xffff0000,
+			     PCIE_WIN04_BASE_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_REMAP_OFF(i));
+		mvebu_writel(port,
+			     ((cs->size - 1) & 0xffff0000) |
+			     (cs->mbus_attr << 8) |
+			     (dram->mbus_dram_target_id << 4) | 1,
+			     PCIE_WIN04_CTRL_OFF(i));
 
 		size += cs->size;
 	}
@@ -209,41 +227,40 @@ static void __init mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
 		size = 1 << fls(size);
 
 	/* Setup BAR[1] to all DRAM banks. */
-	writel(dram->cs[0].base, port->base + PCIE_BAR_LO_OFF(1));
-	writel(0, port->base + PCIE_BAR_HI_OFF(1));
-	writel(((size - 1) & 0xffff0000) | 1,
-	       port->base + PCIE_BAR_CTRL_OFF(1));
+	mvebu_writel(port, dram->cs[0].base, PCIE_BAR_LO_OFF(1));
+	mvebu_writel(port, 0, PCIE_BAR_HI_OFF(1));
+	mvebu_writel(port, ((size - 1) & 0xffff0000) | 1,
+		     PCIE_BAR_CTRL_OFF(1));
 }
 
-static void __init mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
+static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
 {
-	u16 cmd;
-	u32 mask;
+	u32 cmd, mask;
 
 	/* Point PCIe unit MBUS decode windows to DRAM space. */
 	mvebu_pcie_setup_wins(port);
 
 	/* Master + slave enable. */
-	cmd = readw(port->base + PCIE_CMD_OFF);
+	cmd = mvebu_readl(port, PCIE_CMD_OFF);
 	cmd |= PCI_COMMAND_IO;
 	cmd |= PCI_COMMAND_MEMORY;
 	cmd |= PCI_COMMAND_MASTER;
-	writew(cmd, port->base + PCIE_CMD_OFF);
+	mvebu_writel(port, cmd, PCIE_CMD_OFF);
 
 	/* Enable interrupt lines A-D. */
-	mask = readl(port->base + PCIE_MASK_OFF);
+	mask = mvebu_readl(port, PCIE_MASK_OFF);
 	mask |= PCIE_MASK_ENABLE_INTS;
-	writel(mask, port->base + PCIE_MASK_OFF);
+	mvebu_writel(port, mask, PCIE_MASK_OFF);
 }
 
 static int mvebu_pcie_hw_rd_conf(struct mvebu_pcie_port *port,
 				 struct pci_bus *bus,
 				 u32 devfn, int where, int size, u32 *val)
 {
-	writel(PCIE_CONF_ADDR(bus->number, devfn, where),
-	       port->base + PCIE_CONF_ADDR_OFF);
+	mvebu_writel(port, PCIE_CONF_ADDR(bus->number, devfn, where),
+		     PCIE_CONF_ADDR_OFF);
 
-	*val = readl(port->base + PCIE_CONF_DATA_OFF);
+	*val = mvebu_readl(port, PCIE_CONF_DATA_OFF);
 
 	if (size == 1)
 		*val = (*val >> (8 * (where & 3))) & 0xff;
@@ -257,21 +274,24 @@ static int mvebu_pcie_hw_wr_conf(struct mvebu_pcie_port *port,
 				 struct pci_bus *bus,
 				 u32 devfn, int where, int size, u32 val)
 {
-	int ret = PCIBIOS_SUCCESSFUL;
+	u32 _val, shift = 8 * (where & 3);
 
-	writel(PCIE_CONF_ADDR(bus->number, devfn, where),
-	       port->base + PCIE_CONF_ADDR_OFF);
+	mvebu_writel(port, PCIE_CONF_ADDR(bus->number, devfn, where),
+		     PCIE_CONF_ADDR_OFF);
+	_val = mvebu_readl(port, PCIE_CONF_DATA_OFF);
 
 	if (size == 4)
-		writel(val, port->base + PCIE_CONF_DATA_OFF);
+		_val = val;
 	else if (size == 2)
-		writew(val, port->base + PCIE_CONF_DATA_OFF + (where & 3));
+		_val = (_val & ~(0xffff << shift)) | ((val & 0xffff) << shift);
 	else if (size == 1)
-		writeb(val, port->base + PCIE_CONF_DATA_OFF + (where & 3));
+		_val = (_val & ~(0xff << shift)) | ((val & 0xff) << shift);
 	else
-		ret = PCIBIOS_BAD_REGISTER_NUMBER;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
 
-	return ret;
+	mvebu_writel(port, _val, PCIE_CONF_DATA_OFF);
+
+	return PCIBIOS_SUCCESSFUL;
 }
 
 static void mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
@@ -552,7 +572,7 @@ static int mvebu_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	if (bus->number == 0)
 		return mvebu_sw_pci_bridge_write(port, where, size, val);
 
-	if (!port->haslink)
+	if (!mvebu_pcie_link_up(port))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	/*
@@ -594,7 +614,7 @@ static int mvebu_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
 	if (bus->number == 0)
 		return mvebu_sw_pci_bridge_read(port, where, size, val);
 
-	if (!port->haslink) {
+	if (!mvebu_pcie_link_up(port)) {
 		*val = 0xffffffff;
 		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
@@ -626,7 +646,7 @@ static struct pci_ops mvebu_pcie_ops = {
 	.write = mvebu_pcie_wr_conf,
 };
 
-static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
+static int mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
 {
 	struct mvebu_pcie *pcie = sys_to_pcie(sys);
 	int i;
@@ -645,7 +665,7 @@ static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
 	return 1;
 }
 
-static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct of_irq oirq;
 	int ret;
@@ -673,11 +693,17 @@ static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 	return bus;
 }
 
-resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
-					  const struct resource *res,
-					  resource_size_t start,
-					  resource_size_t size,
-					  resource_size_t align)
+static void mvebu_pcie_add_bus(struct pci_bus *bus)
+{
+	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
+	bus->msi = pcie->msi;
+}
+
+static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
+						const struct resource *res,
+						resource_size_t start,
+						resource_size_t size,
+						resource_size_t align)
 {
 	if (dev->bus->number != 0)
 		return start;
@@ -696,7 +722,7 @@ resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
 		return start;
 }
 
-static void __init mvebu_pcie_enable(struct mvebu_pcie *pcie)
+static void mvebu_pcie_enable(struct mvebu_pcie *pcie)
 {
 	struct hw_pci hw;
 
@@ -709,6 +735,7 @@ static void __init mvebu_pcie_enable(struct mvebu_pcie *pcie)
 	hw.map_irq        = mvebu_pcie_map_irq;
 	hw.ops            = &mvebu_pcie_ops;
 	hw.align_resource = mvebu_pcie_align_resource;
+	hw.add_bus        = mvebu_pcie_add_bus;
 
 	pci_common_init(&hw);
 }
@@ -718,10 +745,8 @@ static void __init mvebu_pcie_enable(struct mvebu_pcie *pcie)
  * <...> property for one that matches the given port/lane. Once
  * found, maps it.
  */
-static void __iomem * __init
-mvebu_pcie_map_registers(struct platform_device *pdev,
-			 struct device_node *np,
-			 struct mvebu_pcie_port *port)
+static void __iomem *mvebu_pcie_map_registers(struct platform_device *pdev,
+		      struct device_node *np, struct mvebu_pcie_port *port)
 {
 	struct resource regs;
 	int ret = 0;
@@ -777,7 +802,22 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn,
 	return -ENOENT;
 }
 
-static int __init mvebu_pcie_probe(struct platform_device *pdev)
+static void mvebu_pcie_msi_enable(struct mvebu_pcie *pcie)
+{
+	struct device_node *msi_node;
+
+	msi_node = of_parse_phandle(pcie->pdev->dev.of_node,
+				    "msi-parent", 0);
+	if (!msi_node)
+		return;
+
+	pcie->msi = of_pci_find_msi_chip_by_node(msi_node);
+
+	if (pcie->msi)
+		pcie->msi->dev = &pcie->pdev->dev;
+}
+
+static int mvebu_pcie_probe(struct platform_device *pdev)
 {
 	struct mvebu_pcie *pcie;
 	struct device_node *np = pdev->dev.of_node;
@@ -790,6 +830,7 @@ static int __init mvebu_pcie_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pcie->pdev = pdev;
+	platform_set_drvdata(pdev, pcie);
 
 	/* Get the PCIe memory and I/O aperture */
 	mvebu_mbus_get_pcie_mem_aperture(&pcie->mem);
@@ -818,13 +859,14 @@ static int __init mvebu_pcie_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	i = 0;
 	for_each_child_of_node(pdev->dev.of_node, child) {
 		if (!of_device_is_available(child))
 			continue;
-		pcie->nports++;
+		i++;
 	}
 
-	pcie->ports = devm_kzalloc(&pdev->dev, pcie->nports *
+	pcie->ports = devm_kzalloc(&pdev->dev, i *
 				   sizeof(struct mvebu_pcie_port),
 				   GFP_KERNEL);
 	if (!pcie->ports)
@@ -833,6 +875,7 @@ static int __init mvebu_pcie_probe(struct platform_device *pdev)
 	i = 0;
 	for_each_child_of_node(pdev->dev.of_node, child) {
 		struct mvebu_pcie_port *port = &pcie->ports[i];
+		enum of_gpio_flags flags;
 
 		if (!of_device_is_available(child))
 			continue;
@@ -873,45 +916,68 @@ static int __init mvebu_pcie_probe(struct platform_device *pdev)
 			continue;
 		}
 
-		port->base = mvebu_pcie_map_registers(pdev, child, port);
-		if (IS_ERR(port->base)) {
-			dev_err(&pdev->dev, "PCIe%d.%d: cannot map registers\n",
-				port->port, port->lane);
-			port->base = NULL;
-			continue;
-		}
+		port->reset_gpio = of_get_named_gpio_flags(child,
+						   "reset-gpios", 0, &flags);
+		if (gpio_is_valid(port->reset_gpio)) {
+			u32 reset_udelay = 20000;
 
-		mvebu_pcie_set_local_dev_nr(port, 1);
+			port->reset_active_low = flags & OF_GPIO_ACTIVE_LOW;
+			port->reset_name = kasprintf(GFP_KERNEL,
+				     "pcie%d.%d-reset", port->port, port->lane);
+			of_property_read_u32(child, "reset-delay-us",
+					     &reset_udelay);
 
-		if (mvebu_pcie_link_up(port)) {
-			port->haslink = 1;
-			dev_info(&pdev->dev, "PCIe%d.%d: link up\n",
-				 port->port, port->lane);
-		} else {
-			port->haslink = 0;
-			dev_info(&pdev->dev, "PCIe%d.%d: link down\n",
-				 port->port, port->lane);
+			ret = devm_gpio_request_one(&pdev->dev,
+			    port->reset_gpio, GPIOF_DIR_OUT, port->reset_name);
+			if (ret) {
+				if (ret == -EPROBE_DEFER)
+					return ret;
+				continue;
+			}
+
+			gpio_set_value(port->reset_gpio,
+				       (port->reset_active_low) ? 1 : 0);
+			msleep(reset_udelay/1000);
 		}
 
+		port->clk = of_clk_get_by_name(child, NULL);
+		if (IS_ERR(port->clk)) {
+			dev_err(&pdev->dev, "PCIe%d.%d: cannot get clock\n",
+			       port->port, port->lane);
+			continue;
+		}
+
+		ret = clk_prepare_enable(port->clk);
+		if (ret)
+			continue;
+
+		port->base = mvebu_pcie_map_registers(pdev, child, port);
+		if (IS_ERR(port->base)) {
+			dev_err(&pdev->dev, "PCIe%d.%d: cannot map registers\n",
+				port->port, port->lane);
+			port->base = NULL;
+			clk_disable_unprepare(port->clk);
+			continue;
+		}
+
+		mvebu_pcie_set_local_dev_nr(port, 1);
+
 		port->clk = of_clk_get_by_name(child, NULL);
 		if (IS_ERR(port->clk)) {
 			dev_err(&pdev->dev, "PCIe%d.%d: cannot get clock\n",
 			       port->port, port->lane);
 			iounmap(port->base);
-			port->haslink = 0;
 			continue;
 		}
 
 		port->dn = child;
-
-		clk_prepare_enable(port->clk);
 		spin_lock_init(&port->conf_lock);
-
 		mvebu_sw_pci_bridge_init(port);
-
 		i++;
 	}
 
+	pcie->nports = i;
+	mvebu_pcie_msi_enable(pcie);
 	mvebu_pcie_enable(pcie);
 
 	return 0;
@@ -920,6 +986,7 @@ static int __init mvebu_pcie_probe(struct platform_device *pdev)
 static const struct of_device_id mvebu_pcie_of_match_table[] = {
 	{ .compatible = "marvell,armada-xp-pcie", },
 	{ .compatible = "marvell,armada-370-pcie", },
+	{ .compatible = "marvell,dove-pcie", },
 	{ .compatible = "marvell,kirkwood-pcie", },
 	{},
 };
@@ -931,16 +998,12 @@ static struct platform_driver mvebu_pcie_driver = {
 		.name = "mvebu-pcie",
 		.of_match_table =
 		   of_match_ptr(mvebu_pcie_of_match_table),
+		/* driver unloading/unbinding currently not supported */
+		.suppress_bind_attrs = true,
 	},
+	.probe = mvebu_pcie_probe,
 };
-
-static int __init mvebu_pcie_init(void)
-{
-	return platform_driver_probe(&mvebu_pcie_driver,
-				     mvebu_pcie_probe);
-}
-
-subsys_initcall(mvebu_pcie_init);
+module_platform_driver(mvebu_pcie_driver);
 
 MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
 MODULE_DESCRIPTION("Marvell EBU PCIe driver");
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
new file mode 100644
index 000000000000..89ba1b0c90e4
--- /dev/null
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -0,0 +1,46 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/* Helper to encode the source selection constraints for early s3c socs. */
+#define S3C24XX_DMA_CHANREQ(src, chan)	((BIT(3) | src) << chan * 4)
+
+enum s3c24xx_dma_bus {
+	S3C24XX_DMA_APB,
+	S3C24XX_DMA_AHB,
+};
+
+/**
+ * @bus: on which bus does the peripheral reside - AHB or APB.
+ * @handshake: is a handshake with the peripheral necessary
+ * @chansel: channel selection information, depending on variant; reqsel for
+ *	     s3c2443 and later and channel-selection map for earlier SoCs
+ *	     see CHANSEL doc in s3c2443-dma.c
+ */
+struct s3c24xx_dma_channel {
+	enum s3c24xx_dma_bus bus;
+	bool handshake;
+	u16 chansel;
+};
+
+/**
+ * struct s3c24xx_dma_platdata - platform specific settings
+ * @num_phy_channels: number of physical channels
+ * @channels: array of virtual channel descriptions
+ * @num_channels: number of virtual channels
+ */
+struct s3c24xx_dma_platdata {
+	int num_phy_channels;
+	struct s3c24xx_dma_channel *channels;
+	int num_channels;
+};
+
+struct dma_chan;
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param);
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
new file mode 100644
index 000000000000..6efd20264585
--- /dev/null
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -0,0 +1,60 @@
+/*
+ * DaVinci GPIO Platform Related Defines
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DAVINCI_GPIO_PLATFORM_H
+#define __DAVINCI_GPIO_PLATFORM_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+#include <asm-generic/gpio.h>
+
+enum davinci_gpio_type {
+	GPIO_TYPE_TNETV107X = 0,
+};
+
+struct davinci_gpio_platform_data {
+	u32	ngpio;
+	u32	gpio_unbanked;
+	u32	intc_irq_num;
+};
+
+
+struct davinci_gpio_controller {
+	struct gpio_chip	chip;
+	int			irq_base;
+	/* Serialize access to GPIO registers */
+	spinlock_t		lock;
+	void __iomem		*regs;
+	void __iomem		*set_data;
+	void __iomem		*clr_data;
+	void __iomem		*in_data;
+	int			gpio_unbanked;
+	unsigned		gpio_irq;
+};
+
+/*
+ * basic gpio routines
+ */
+#define	GPIO(X)		(X)	/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
+
+/* Convert GPIO signal to GPIO pin number */
+#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
+
+static inline u32 __gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
+#endif
diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig
index 2eea1840315d..37459dfd168d 100644
--- a/sound/soc/samsung/Kconfig
+++ b/sound/soc/samsung/Kconfig
@@ -2,7 +2,7 @@ config SND_SOC_SAMSUNG
 	tristate "ASoC support for Samsung"
 	depends on PLAT_SAMSUNG
 	select S3C64XX_DMA if ARCH_S3C64XX
-	select S3C2410_DMA if ARCH_S3C24XX
+	select S3C24XX_DMA if ARCH_S3C24XX
 	help
 	  Say Y or M if you want to add support for codecs attached to
 	  the Samsung SoCs' Audio interfaces. You will also need to