From c64e38ea17a81721da0393584fd807f8434050fa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Date: Mon, 1 Nov 2010 14:32:27 -0400 Subject: [PATCH 001/179] xen/blkfront: map REQ_FLUSH into a full barrier Implement a flush as a full barrier, since we have nothing weaker. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Acked-by: Christoph Hellwig <hch@lst.de> --- drivers/block/xen-blkfront.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 06e2812ba124..3a318d8576c5 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, } /* - * blkif_queue_request + * Generate a Xen blkfront IO request from a blk layer request. Reads + * and writes are handled as expected. Since we lack a loose flush + * request, we map flushes into a full ordered barrier. * - * request block io - * - * id: for guest use only. - * operation: BLKIF_OP_{READ,WRITE,PROBE} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. + * @req: a request struct */ static int blkif_queue_request(struct request *req) { @@ -289,7 +286,7 @@ static int blkif_queue_request(struct request *req) ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & REQ_HARDBARRIER) + if (req->cmd_flags & REQ_FLUSH) ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -1069,14 +1066,8 @@ static void blkfront_connect(struct blkfront_info *info) */ info->feature_flush = 0; - /* - * The driver doesn't properly handled empty flushes, so - * lets disable barrier support for now. - */ -#if 0 if (!err && barrier) info->feature_flush = REQ_FLUSH; -#endif err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { From a945b9801a9bfd4a98bcfd9f6656b5027b254e3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Date: Mon, 1 Nov 2010 17:03:14 -0400 Subject: [PATCH 002/179] xen/blkfront: change blk_shadow.request to proper pointer Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> --- drivers/block/xen-blkfront.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3a318d8576c5..31c8a643d109 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -65,7 +65,7 @@ enum blkif_state { struct blk_shadow { struct blkif_request req; - unsigned long request; + struct request *request; unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; @@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { info->shadow[id].req.id = info->shadow_free; - info->shadow[id].request = 0; + info->shadow[id].request = NULL; info->shadow_free = id; } @@ -278,7 +278,7 @@ static int blkif_queue_request(struct request *req) /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); id = get_id_from_freelist(info); - info->shadow[id].request = (unsigned long)req; + info->shadow[id].request = req; ring_req->id = id; ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); @@ -633,7 +633,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; - req = (struct request *)info->shadow[id].request; + req = info->shadow[id].request; blkif_completion(&info->shadow[id]); @@ -898,7 +898,7 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 3: Find pending requests and requeue them. */ for (i = 0; i < BLK_RING_SIZE; i++) { /* Not in use? */ - if (copy[i].request == 0) + if (!copy[i].request) continue; /* Grab a request slot and copy shadow state into it. */ @@ -915,9 +915,7 @@ static int blkif_recover(struct blkfront_info *info) req->seg[j].gref, info->xbdev->otherend_id, pfn_to_mfn(info->shadow[req->id].frame[j]), - rq_data_dir( - (struct request *) - info->shadow[req->id].request)); + rq_data_dir(info->shadow[req->id].request)); info->shadow[req->id].req = *req; info->ring.req_prod_pvt++; From be2f8373c188ed1f5d36003c9928e4d695213080 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Date: Tue, 2 Nov 2010 10:38:33 -0400 Subject: [PATCH 003/179] xen/blkfront: Implement FUA with BLKIF_OP_WRITE_BARRIER The BLKIF_OP_WRITE_BARRIER is a full ordered barrier, so we can use it to implement FUA as well as a plain FLUSH. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Acked-by: Christoph Hellwig <hch@lst.de> --- drivers/block/xen-blkfront.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 31c8a643d109..76b874a79175 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -286,8 +286,18 @@ static int blkif_queue_request(struct request *req) ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & REQ_FLUSH) + + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + /* + * Ideally we could just do an unordered + * flush-to-disk, but all we have is a full write + * barrier at the moment. However, a barrier write is + * a superset of FUA, so we can implement it the same + * way. (It's also a FLUSH+FUA, since it is + * guaranteed ordered WRT previous writes.) + */ ring_req->operation = BLKIF_OP_WRITE_BARRIER; + } ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); @@ -1065,7 +1075,7 @@ static void blkfront_connect(struct blkfront_info *info) info->feature_flush = 0; if (!err && barrier) - info->feature_flush = REQ_FLUSH; + info->feature_flush = REQ_FLUSH | REQ_FUA; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { From dcb8baeceaa1c629bbd06f472cea023ad08a0c33 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Date: Tue, 2 Nov 2010 11:55:58 -0400 Subject: [PATCH 004/179] xen/blkfront: cope with backend that fail empty BLKIF_OP_WRITE_BARRIER requests Some(?) Xen block backends fail BLKIF_OP_WRITE_BARRIER requests, which Linux uses as a cache flush operation. In that case, disable use of FLUSH. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/xen-blkfront.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 76b874a79175..4f9e22f29138 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -656,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; + } + if (unlikely(bret->status == BLKIF_RSP_ERROR && + info->shadow[id].req.nr_segments == 0)) { + printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n", + info->gd->disk_name); + error = -EOPNOTSUPP; + } + if (unlikely(error)) { + if (error == -EOPNOTSUPP) + error = 0; info->feature_flush = 0; xlvbd_flush(info); } From 25d5c699f983a2da51f5165eb9a8fc6338124b6c Mon Sep 17 00:00:00 2001 From: Philip Rakity <prakity@marvell.com> Date: Sun, 7 Nov 2010 16:22:28 -0500 Subject: [PATCH 005/179] mmc: Fix printing of card DDR type We should not call mmc_card_set_ddr_mode() if we are in single data mode. This sets DDR and causes the kernel log to say the card is DDR when it is not. Explicitly set ddr to 0 rather then rely on MMC_SDR_MODE being 0 when doing the checks. Signed-off-by: Philip Rakity <prakity@marvell.com> Acked-by: Linus Walleij <linus.walleij@stericsson.com> Acked-by: Kyungmin Park <kyungmin.park@samsung.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/core/mmc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 995261f7fd70..77f93c3b8808 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, struct mmc_card *oldcard) { struct mmc_card *card; - int err, ddr = MMC_SDR_MODE; + int err, ddr = 0; u32 cid[4]; unsigned int max_dtr; @@ -562,7 +562,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, 1 << bus_width, ddr); err = 0; } else { - mmc_card_set_ddr_mode(card); + if (ddr) + mmc_card_set_ddr_mode(card); + else + ddr = MMC_SDR_MODE; + mmc_set_bus_width_ddr(card->host, bus_width, ddr); } } From 14d4031d21d8a63ad84e5ab9198d0503efabc780 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 4 Nov 2010 13:59:11 +0800 Subject: [PATCH 006/179] mmc: ushc: Return proper error code for ushc_probe() Improves error handling in the ushc driver. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: David Vrabel <david.vrabel@csr.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/ushc.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c index b4ead4a13c98..f8f65df9b017 100644 --- a/drivers/mmc/host/ushc.c +++ b/drivers/mmc/host/ushc.c @@ -425,7 +425,7 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id struct usb_device *usb_dev = interface_to_usbdev(intf); struct mmc_host *mmc; struct ushc_data *ushc; - int ret = -ENOMEM; + int ret; mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) @@ -462,11 +462,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id mmc->max_blk_count = 511; ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->int_urb == NULL) + if (ushc->int_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL); - if (ushc->int_data == NULL) + if (ushc->int_data == NULL) { + ret = -ENOMEM; goto err; + } usb_fill_int_urb(ushc->int_urb, ushc->usb_dev, usb_rcvintpipe(usb_dev, intf->cur_altsetting->endpoint[0].desc.bEndpointAddress), @@ -475,11 +479,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id intf->cur_altsetting->endpoint[0].desc.bInterval); ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->cbw_urb == NULL) + if (ushc->cbw_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); - if (ushc->cbw == NULL) + if (ushc->cbw == NULL) { + ret = -ENOMEM; goto err; + } ushc->cbw->signature = USHC_CBW_SIGNATURE; usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2), @@ -487,15 +495,21 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id cbw_callback, ushc); ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->data_urb == NULL) + if (ushc->data_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->csw_urb == NULL) + if (ushc->csw_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); - if (ushc->csw == NULL) + if (ushc->csw == NULL) { + ret = -ENOMEM; goto err; + } usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6), ushc->csw, sizeof(struct ushc_csw), csw_callback, ushc); From 5f619704d18b93869d045abc49e09cdba109b04b Mon Sep 17 00:00:00 2001 From: Daniel Drake <dsd@laptop.org> Date: Thu, 4 Nov 2010 22:20:39 +0000 Subject: [PATCH 007/179] mmc: sdhci: Properly enable SDIO IRQ wakeups A little more work was needed for SDIO IRQ wakeups to be functional. Wake-on-WLAN on the SD WiFi adapter in the XO-1.5 laptop is now working. Signed-off-by: Daniel Drake <dsd@laptop.org> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/sdhci-pci.c | 11 +++++++++-- drivers/mmc/host/sdhci.c | 10 ++++++++++ drivers/mmc/host/sdhci.h | 4 ++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 55746bac2f44..d196e77a93dc 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -637,6 +637,7 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) { struct sdhci_pci_chip *chip; struct sdhci_pci_slot *slot; + mmc_pm_flag_t slot_pm_flags; mmc_pm_flag_t pm_flags = 0; int i, ret; @@ -657,7 +658,11 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) return ret; } - pm_flags |= slot->host->mmc->pm_flags; + slot_pm_flags = slot->host->mmc->pm_flags; + if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ) + sdhci_enable_irq_wakeups(slot->host); + + pm_flags |= slot_pm_flags; } if (chip->fixes && chip->fixes->suspend) { @@ -671,8 +676,10 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) pci_save_state(pdev); if (pm_flags & MMC_PM_KEEP_POWER) { - if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) + if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) { + pci_pme_active(pdev, true); pci_enable_wake(pdev, PCI_D3hot, 1); + } pci_set_power_state(pdev, PCI_D3hot); } else { pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 782c0ee3c925..154cbf83c1ab 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1681,6 +1681,16 @@ int sdhci_resume_host(struct sdhci_host *host) EXPORT_SYMBOL_GPL(sdhci_resume_host); +void sdhci_enable_irq_wakeups(struct sdhci_host *host) +{ + u8 val; + val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL); + val |= SDHCI_WAKE_ON_INT; + sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); +} + +EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups); + #endif /* CONFIG_PM */ /*****************************************************************************\ diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index b7b8a3b28b01..d52a7163b97a 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -87,6 +87,9 @@ #define SDHCI_BLOCK_GAP_CONTROL 0x2A #define SDHCI_WAKE_UP_CONTROL 0x2B +#define SDHCI_WAKE_ON_INT 0x01 +#define SDHCI_WAKE_ON_INSERT 0x02 +#define SDHCI_WAKE_ON_REMOVE 0x04 #define SDHCI_CLOCK_CONTROL 0x2C #define SDHCI_DIVIDER_SHIFT 8 @@ -317,6 +320,7 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead); #ifdef CONFIG_PM extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); extern int sdhci_resume_host(struct sdhci_host *host); +extern void sdhci_enable_irq_wakeups(struct sdhci_host *host); #endif #endif /* __SDHCI_HW_H */ From 37865fe91582582a6f6c00652f6a2b1ff71f8a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= <eric@eukrea.com> Date: Sat, 23 Oct 2010 01:57:21 +0200 Subject: [PATCH 008/179] mmc: sdhci-esdhc-imx: fix timeout on i.MX's sdhci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes timeout problems on i.MX's sdhci as suggested by Richard Zhu. Tested on: - i.MX257: not needed - i.MX357: needed - i.MX515: needed More details can be found here: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-October/029748.html Signed-off-by: Eric Bénard <eric@eukrea.com> Tested-by: Shawn Guo <shawn.gsc@gmail.com> Acked-by: Wolfram Sang <w.sang@pengutronix.de> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/sdhci-esdhc-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 2e9cca19c90b..28e63ef58d0f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -17,6 +17,7 @@ #include <linux/clk.h> #include <linux/mmc/host.h> #include <linux/mmc/sdhci-pltfm.h> +#include <mach/hardware.h> #include "sdhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" @@ -112,6 +113,9 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd clk_enable(clk); pltfm_host->clk = clk; + if (cpu_is_mx35() || cpu_is_mx51()) + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + return 0; } From 16a790bcce87740d219b7227eaa4df72804097ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= <eric@eukrea.com> Date: Sat, 23 Oct 2010 01:57:22 +0200 Subject: [PATCH 009/179] mmc: sdhci-esdhc-imx: enable QUIRK_NO_MULTIBLOCK only for i.MX25 and i.MX35 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only these CPUs list the bug in their errata. Signed-off-by: Eric Bénard <eric@eukrea.com> Acked-by: Wolfram Sang <w.sang@pengutronix.de> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/sdhci-esdhc-imx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 28e63ef58d0f..9b82910b9dbb 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -116,6 +116,10 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd if (cpu_is_mx35() || cpu_is_mx51()) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + /* Fix errata ENGcm07207 which is present on i.MX25 and i.MX35 */ + if (cpu_is_mx25() || cpu_is_mx35()) + host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK; + return 0; } @@ -137,10 +141,8 @@ static struct sdhci_ops sdhci_esdhc_ops = { }; struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { - .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_NO_MULTIBLOCK - | SDHCI_QUIRK_BROKEN_ADMA, + .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA, /* ADMA has issues. Might be fixable */ - /* NO_MULTIBLOCK might be MX35 only (Errata: ENGcm07207) */ .ops = &sdhci_esdhc_ops, .init = esdhc_pltfm_init, .exit = esdhc_pltfm_exit, From 35ac6f081f26e1b6b3482b9c8dfccebe7817c691 Mon Sep 17 00:00:00 2001 From: Jacob Pan <jacob.jun.pan@linux.intel.com> Date: Tue, 9 Nov 2010 13:57:29 +0000 Subject: [PATCH 010/179] mmc: sdhci: Fix crash on boot with C0 stepping Moorestown platforms SDHC2 is newly added in C0 stepping of Langwell. Without the Moorestown specific quirk, the default pci_probe will be called and crash the kernel. This patch unblocks the crash problem on C0 by using the same probing function as HC1, which limits the number of slots to one. Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com> Signed-off-by: Alan Cox <alan@linux.intel.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/sdhci-pci.c | 20 ++++++++++++++------ include/linux/pci_ids.h | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index d196e77a93dc..3d9c2460d437 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -149,11 +149,11 @@ static const struct sdhci_pci_fixes sdhci_cafe = { * ADMA operation is disabled for Moorestown platform due to * hardware bugs. */ -static int mrst_hc1_probe(struct sdhci_pci_chip *chip) +static int mrst_hc_probe(struct sdhci_pci_chip *chip) { /* - * slots number is fixed here for MRST as SDIO3 is never used and has - * hardware bugs. + * slots number is fixed here for MRST as SDIO3/5 are never used and + * have hardware bugs. */ chip->num_slots = 1; return 0; @@ -163,9 +163,9 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, }; -static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1 = { +static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, - .probe = mrst_hc1_probe, + .probe = mrst_hc_probe, }; static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { @@ -538,7 +538,15 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MRST_SD1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1, + .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_MRST_SD2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2, }, { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c6bcfe93b9ca..d369b533dc2a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2441,6 +2441,7 @@ #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 +#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 From c0deae8c9587419ab13874b74425ce2eb2e18508 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Date: Wed, 3 Nov 2010 18:52:56 +0200 Subject: [PATCH 011/179] posix-cpu-timers: Rcu_read_lock/unlock protect find_task_by_vpid call Commit 4221a9918e38b7494cee341dda7b7b4bb8c04bde "Add RCU check for find_task_by_vpid()" introduced rcu_lockdep_assert to find_task_by_pid_ns. Add rcu_read_lock/rcu_read_unlock to call find_task_by_vpid. Tetsuo Handa wrote: | Quoting from one of posts in that thead | http://kerneltrap.org/mailarchive/linux-kernel/2010/2/8/4536388 | || Usually tasklist gives enough protection, but if copy_process() fails || it calls free_pid() lockless and does call_rcu(delayed_put_pid(). || This means, without rcu lock find_pid_ns() can't scan the hash table || safely. Thomas Gleixner wrote: | We can remove the tasklist_lock while at it. rcu_read_lock is enough. Patch also replaces thread_group_leader with has_group_leader_pid in accordance to comment by Oleg Nesterov: | ... thread_group_leader() check is not relaible without | tasklist. If we race with de_thread() find_task_by_vpid() can find | the new leader before it updates its ->group_leader. | | perhaps it makes sense to change posix_cpu_timer_create() to use | has_group_leader_pid() instead, just to make this code not look racy | and avoid adding new problems. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stanislaw Gruszka <sgruszka@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> LKML-Reference: <20101103165256.GD30053@swordfish.minsk.epam.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/posix-cpu-timers.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba5879..05bb7173850e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock) if (pid == 0) return 0; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_task_by_vpid(pid); if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? - same_thread_group(p, current) : thread_group_leader(p))) { + same_thread_group(p, current) : has_group_leader_pid(p))) { error = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return error; } @@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) INIT_LIST_HEAD(&new_timer->it.cpu.entry); - read_lock(&tasklist_lock); + rcu_read_lock(); if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { if (pid == 0) { p = current; @@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) p = current->group_leader; } else { p = find_task_by_vpid(pid); - if (p && !thread_group_leader(p)) + if (p && !has_group_leader_pid(p)) p = NULL; } } @@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) } else { ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } From 13b9b6e746d753d43270a78dd39694912646b5d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <srostedt@redhat.com> Date: Wed, 10 Nov 2010 22:19:24 -0500 Subject: [PATCH 012/179] tracing: Fix module use of trace_bprintk() On use of trace_printk() there's a macro that determines if the format is static or a variable. If it is static, it defaults to __trace_bprintk() otherwise it uses __trace_printk(). A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we discussed a way to allow modules to use it. The difference between __trace_bprintk() and __trace_printk() is that for faster processing, just the format and args are stored in the trace instead of running it through a sprintf function. In order to do this, the format used by the __trace_bprintk() had to be persistent. See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec The problem comes with trace_bprintk() where the module is unloaded. The pointer left in the buffer is still pointing to the format. To solve this issue, the formats in the module were copied into kernel core. If the same format was used, they would use the same copy (to prevent memory leak). This all worked well until we tried to merge everything. At the time this was written, Lai Jiangshan, Frederic Weisbecker, Ingo Molnar and myself were all touching the same code. When this was merged, we lost the part of it that was in module.c. This kept out the copying of the formats and unloading the module could cause bad pointers left in the ring buffer. This patch adds back (with updates required for current kernel) the module code that sets up the necessary pointers. Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- kernel/module.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524a..d190664f25ff 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * mod->num_trace_events, GFP_KERNEL); #endif +#ifdef CONFIG_TRACING + mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", + sizeof(*mod->trace_bprintk_fmt_start), + &mod->num_trace_bprintk_fmt); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_bprintk_fmt_start, + sizeof(*mod->trace_bprintk_fmt_start) * + mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ mod->ftrace_callsites = section_objs(info, "__mcount_loc", From b5908548537ccd3ada258ca5348df7ffc93e5a06 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <srostedt@redhat.com> Date: Wed, 10 Nov 2010 22:29:49 -0500 Subject: [PATCH 013/179] tracing: Force arch_local_irq_* notrace for paravirt When running ktest.pl randconfig tests, I would sometimes trigger a lockdep annotation bug (possible reason: unannotated irqs-on). This triggering happened right after function tracer self test was executed. After doing a config bisect I found that this was caused with having function tracer, paravirt guest, prove locking, and rcu torture all enabled. The rcu torture just enhanced the likelyhood of triggering the bug. Prove locking was needed, since it was the thing that was bugging. Function tracer would trace and disable interrupts in all sorts of funny places. paravirt guest would turn arch_local_irq_* into functions that would be traced. Besides the fact that tracing arch_local_irq_* is just a bad idea, this is what is happening. The bug happened simply in the local_irq_restore() code: if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ } else { \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ } \ The raw_local_irq_restore() was defined as arch_local_irq_restore(). Now imagine, we are about to enable interrupts. We go into the else case and call trace_hardirqs_on() which tells lockdep that we are enabling interrupts, so it sets the current->hardirqs_enabled = 1. Then we call raw_local_irq_restore() which calls arch_local_irq_restore() which gets traced! Now in the function tracer we disable interrupts with local_irq_save(). This is fine, but flags is stored that we have interrupts disabled. When the function tracer calls local_irq_restore() it does it, but this time with flags set as disabled, so we go into the if () path. This keeps interrupts disabled and calls trace_hardirqs_off() which sets current->hardirqs_enabled = 0. When the tracer is finished and proceeds with the original code, we enable interrupts but leave current->hardirqs_enabled as 0. Which now breaks lockdeps internal processing. Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86/include/asm/paravirt.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 18e3b8a8709f..ef9975812c77 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } -static inline void arch_local_irq_restore(unsigned long f) +static inline notrace void arch_local_irq_restore(unsigned long f) { PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_disable); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_enable); } -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long f; From d9bcbf343ec63e1104b5276195888ee06b4d086f Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski <g.liakhovetski@gmx.de> Date: Thu, 11 Nov 2010 17:32:25 +0100 Subject: [PATCH 014/179] mmc: fix rmmod race for hosts using card-detection polling MMC hosts that poll for card detection by defining the MMC_CAP_NEEDS_POLL flag have a race on rmmod, where the delayed work is cancelled without waiting for completed polling. To prevent this a _sync version of the work cancellation has to be used. Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> Cc: <stable@kernel.org> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/core/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 8f86d702e46e..31ae07a36576 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1559,7 +1559,7 @@ void mmc_stop_host(struct mmc_host *host) if (host->caps & MMC_CAP_DISABLE) cancel_delayed_work(&host->disable); - cancel_delayed_work(&host->detect); + cancel_delayed_work_sync(&host->detect); mmc_flush_scheduled_work(); /* clear pm flags now and let card drivers set them as needed */ From 6c0aca288e726405b01dacb12cac556454d34b2a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker <fweisbec@gmail.com> Date: Thu, 11 Nov 2010 21:18:43 +0100 Subject: [PATCH 015/179] x86: Ignore trap bits on single step exceptions When a single step exception fires, the trap bits, used to signal hardware breakpoints, are in a random state. These trap bits might be set if another exception will follow, like a breakpoint in the next instruction, or a watchpoint in the previous one. Or there can be any junk there. So if we handle these trap bits during the single step exception, we are going to handle an exception twice, or we are going to handle junk. Just ignore them in this case. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=21332 Reported-by: Michael Stefaniuc <mstefani@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Rafael J. Wysocki <rjw@sisk.pl> Cc: Maciej Rutecki <maciej.rutecki@gmail.com> Cc: Alexandre Julliard <julliard@winehq.org> Cc: Jason Wessel <jason.wessel@windriver.com> Cc: All since 2.6.33.x <stable@kernel.org> --- arch/x86/kernel/hw_breakpoint.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff15c9dcc25d..42c594254507 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; + /* If it's a single step, TRAP bits are random */ + if (dr6 & DR_STEP) + return NOTIFY_DONE; + /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; From 3c502e7a0255d82621ff25d60cc816624830497e Mon Sep 17 00:00:00 2001 From: Jason Wessel <jason.wessel@windriver.com> Date: Thu, 4 Nov 2010 17:33:01 -0500 Subject: [PATCH 016/179] perf,hw_breakpoint: Initialize hardware api earlier When using early debugging, the kernel does not initialize the hw_breakpoint API early enough and causes the late initialization of the kernel debugger to fail. The boot arguments are: earlyprintk=vga ekgdboc=kbd kgdbwait Then simply type "go" at the kdb prompt and boot. The kernel will later emit the message: kgdb: Could not allocate hwbreakpoints And at that point the kernel debugger will cease to work correctly. The solution is to initialize the hw_breakpoint at the same time that all the other perf call backs are initialized instead of using a core_initcall() initialization which happens well after the kernel debugger can make use of hardware breakpoints. Signed-off-by: Jason Wessel <jason.wessel@windriver.com> CC: Frederic Weisbecker <fweisbec@gmail.com> CC: Ingo Molnar <mingo@elte.hu> CC: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4CD3396D.1090308@windriver.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> --- include/linux/hw_breakpoint.h | 4 ++++ kernel/hw_breakpoint.c | 3 +-- kernel/perf_event.c | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index a2d6ea49ec56..d1e55fed2c7d 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -33,6 +33,8 @@ enum bp_type_idx { #ifdef CONFIG_HAVE_HW_BREAKPOINT +extern int __init init_hw_breakpoint(void); + static inline void hw_breakpoint_init(struct perf_event_attr *attr) { memset(attr, 0, sizeof(*attr)); @@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ +static inline int __init init_hw_breakpoint(void) { return 0; } + static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afca..e5325825aeb6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = { .read = hw_breakpoint_pmu_read, }; -static int __init init_hw_breakpoint(void) +int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; int cpu, err_cpu; @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void) return -ENOMEM; } -core_initcall(init_hw_breakpoint); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f4982..05b7d8c72c6c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,6 +31,7 @@ #include <linux/kernel_stat.h> #include <linux/perf_event.h> #include <linux/ftrace_event.h> +#include <linux/hw_breakpoint.h> #include <asm/irq_regs.h> @@ -6295,6 +6296,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) void __init perf_event_init(void) { + int ret; + perf_event_init_all_cpus(); init_srcu_struct(&pmus_srcu); perf_pmu_register(&perf_swevent); @@ -6302,4 +6305,7 @@ void __init perf_event_init(void) perf_pmu_register(&perf_task_clock); perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); + + ret = init_hw_breakpoint(); + WARN(ret, "hw_breakpoint initialization failed with: %d", ret); } From 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt <srostedt@redhat.com> Date: Wed, 10 Nov 2010 12:56:12 +0100 Subject: [PATCH 017/179] tracing: Fix recursive user stack trace The user stack trace can fault when examining the trace. Which would call the do_page_fault handler, which would trace again, which would do the user stack trace, which would fault and call do_page_fault again ... Thus this is causing a recursive bug. We need to have a recursion detector here. [ Resubmitted by Jiri Olsa ] [ Eric Dumazet recommended using __this_cpu_* instead of __get_cpu_* ] Cc: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Jiri Olsa <jolsa@redhat.com> LKML-Reference: <1289390172-9730-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- kernel/trace/trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..ee6a7339cf0e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1284,6 +1284,8 @@ void trace_dump_stack(void) __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); } +static DEFINE_PER_CPU(int, user_stack_count); + void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { @@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (unlikely(in_nmi())) return; + /* + * prevent recursion, since the user stack tracing may + * trigger other kernel events. + */ + preempt_disable(); + if (__this_cpu_read(user_stack_count)) + goto out; + + __this_cpu_inc(user_stack_count); + + + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) @@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) save_stack_trace_user(&trace); if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); + + __this_cpu_dec(user_stack_count); + + out: + preempt_enable(); } #ifdef UNUSED From d69b78ba1deaaa95ffa8dac5a9ca819ce454d52e Mon Sep 17 00:00:00 2001 From: Greg Thelen <gthelen@google.com> Date: Mon, 15 Nov 2010 10:20:52 +0100 Subject: [PATCH 018/179] ioprio: grab rcu_read_lock in sys_ioprio_{set,get}() Using: - CONFIG_LOCKUP_DETECTOR=y - CONFIG_PREEMPT=y - CONFIG_LOCKDEP=y - CONFIG_PROVE_LOCKING=y - CONFIG_PROVE_RCU=y found a missing rcu lock during boot on a 512 MiB x86_64 ubuntu vm: =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/pid.c:419 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by ureadahead/1355: #0: (tasklist_lock){.+.+..}, at: [<ffffffff8115bc09>] sys_ioprio_set+0x7f/0x29e stack backtrace: Pid: 1355, comm: ureadahead Not tainted 2.6.37-dbg-DEV #1 Call Trace: [<ffffffff8109c10c>] lockdep_rcu_dereference+0xaa/0xb3 [<ffffffff81088cbf>] find_task_by_pid_ns+0x44/0x5d [<ffffffff81088cfa>] find_task_by_vpid+0x22/0x24 [<ffffffff8115bc3e>] sys_ioprio_set+0xb4/0x29e [<ffffffff8147cf21>] ? trace_hardirqs_off_thunk+0x3a/0x3c [<ffffffff8105c409>] sysenter_dispatch+0x7/0x2c [<ffffffff8147cee2>] ? trace_hardirqs_on_thunk+0x3a/0x3f The fix is to: a) grab rcu lock in sys_ioprio_{set,get}() and b) avoid grabbing tasklist_lock. Discussion in: http://marc.info/?l=linux-kernel&m=128951324702889 Signed-off-by: Greg Thelen <gthelen@google.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Modified by Jens to remove the now redundant inner rcu lock and unlock since they are now protected by the outer lock. Signed-off-by: Jens Axboe <jaxboe@fusionio.com> --- fs/ioprio.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/fs/ioprio.c b/fs/ioprio.c index 2f7d05c89922..7da2a06508e5 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) } ret = -ESRCH; - /* - * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", - * so we can't use rcu_read_lock(). See re-copy of ->ioprio - * in copy_process(). - */ - read_lock(&tasklist_lock); + rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: - rcu_read_lock(); if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = set_task_ioprio(p, ioprio); - rcu_read_unlock(); break; case IOPRIO_WHO_PGRP: if (!who) @@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) break; do_each_thread(g, p) { - int match; - - rcu_read_lock(); - match = __task_cred(p)->uid == who; - rcu_read_unlock(); - if (!match) + if (__task_cred(p)->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -160,7 +148,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } @@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) int ret = -ESRCH; int tmpio; - read_lock(&tasklist_lock); + rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: - rcu_read_lock(); if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = get_task_ioprio(p); - rcu_read_unlock(); break; case IOPRIO_WHO_PGRP: if (!who) @@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) break; do_each_thread(g, p) { - int match; - - rcu_read_lock(); - match = __task_cred(p)->uid == user->uid; - rcu_read_unlock(); - if (!match) + if (__task_cred(p)->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } From c2f6805d470af369a7337801deeecea800dbfe1c Mon Sep 17 00:00:00 2001 From: Vivek Goyal <vgoyal@redhat.com> Date: Mon, 15 Nov 2010 19:32:42 +0100 Subject: [PATCH 019/179] blk-throttle: Fix calculation of max number of WRITES to be dispatched o Currently we try to dispatch more READS and less WRITES (75%, 25%) in one dispatch round. ummy pointed out that there is a bug in max_nr_writes calculation. This patch fixes it. Reported-by: ummy y <yummylln@yahoo.com.cn> Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com> --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 56ad4531b412..004be80fd894 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, { unsigned int nr_reads = 0, nr_writes = 0; unsigned int max_nr_reads = throtl_grp_quantum*3/4; - unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; + unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; struct bio *bio; /* Try to dispatch 75% READS and 25% WRITES */ From 3e9bb2a071614f1d185740f31ac503ecba11d783 Mon Sep 17 00:00:00 2001 From: Vivek Goyal <vgoyal@redhat.com> Date: Mon, 15 Nov 2010 19:32:43 +0100 Subject: [PATCH 020/179] block: fix amiga and atari floppy driver compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geert, my crosstool don't produce warning below. I guess this has to do something with compiler version. - Geert noticed following warning during compilation. drivers/block/amiflop.c:1344: warning: ‘rq’ may be used uninitialized in this function drivers/block/ataflop.c:1402: warning: ‘rq’ may be used uninitialized in this function - Initialize rq to NULL to fix the warning. If we can't find a suitable request to dispatch, this function should return NULL instead of a possibly garbage pointer. - Cross compile tested only. Don't have hardware to test it. Reported-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com> --- drivers/block/amiflop.c | 2 +- drivers/block/ataflop.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index a1725e6488d3..7888501ad9ee 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1341,7 +1341,7 @@ static struct request *set_next_request(void) { struct request_queue *q; int cnt = FD_MAX_UNITS; - struct request *rq; + struct request *rq = NULL; /* Find next queue we can dispatch from */ fdc_queue = fdc_queue + 1; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 4e4cc6c828cb..605a67e40bbf 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1399,7 +1399,7 @@ static struct request *set_next_request(void) { struct request_queue *q; int old_pos = fdc_queue; - struct request *rq; + struct request *rq = NULL; do { q = unit[fdc_queue].disk->queue; From 902bca00dc6e3b3ff5fbb1e32e5dbb45d5f30579 Mon Sep 17 00:00:00 2001 From: Stefan Richter <stefanr@s5r6.in-berlin.de> Date: Sat, 6 Nov 2010 12:36:13 +0100 Subject: [PATCH 021/179] firewire: net: count stats.tx_packets and stats.tx_bytes Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> --- drivers/firewire/net.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 18fdd9703b48..e2e968e732bc 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -906,6 +906,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask); static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) { struct fwnet_device *dev = ptask->dev; + struct sk_buff *skb = ptask->skb; unsigned long flags; bool free; @@ -916,8 +917,11 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) /* Check whether we or the networking TX soft-IRQ is last user. */ free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link)); - if (ptask->outstanding_pkts == 0) + if (ptask->outstanding_pkts == 0) { list_del(&ptask->pt_link); + dev->netdev->stats.tx_packets++; + dev->netdev->stats.tx_bytes += skb->len; + } spin_unlock_irqrestore(&dev->lock, flags); @@ -926,7 +930,6 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) u16 fg_off; u16 datagram_label; u16 lf; - struct sk_buff *skb; /* Update the ptask to point to the next fragment and send it */ lf = fwnet_get_hdr_lf(&ptask->hdr); @@ -953,7 +956,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) datagram_label = fwnet_get_hdr_dgl(&ptask->hdr); break; } - skb = ptask->skb; + skb_pull(skb, ptask->max_payload); if (ptask->outstanding_pkts > 1) { fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG, From 7ee11fa8d0a84b05cefe12b0bebc05ab0ea89cd6 Mon Sep 17 00:00:00 2001 From: Stefan Richter <stefanr@s5r6.in-berlin.de> Date: Sat, 6 Nov 2010 16:57:28 +0100 Subject: [PATCH 022/179] firewire: net: fix memory leaks a) fwnet_transmit_packet_done used to poison ptask->pt_link by list_del. If fwnet_send_packet checked later whether it was responsible to clean up (in the border case that the TX soft IRQ was outpaced by the AT-req tasklet on another CPU), it missed this because ptask->pt_link was no longer shown as empty. b) If fwnet_write_complete got an rcode other than RCODE_COMPLETE, we missed to free the skb and ptask entirely. Also, count stats.tx_dropped and stats.tx_errors when rcode != 0. Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> --- drivers/firewire/net.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index e2e968e732bc..3a27cee5bf26 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -916,9 +916,10 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) /* Check whether we or the networking TX soft-IRQ is last user. */ free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link)); + if (free) + list_del(&ptask->pt_link); if (ptask->outstanding_pkts == 0) { - list_del(&ptask->pt_link); dev->netdev->stats.tx_packets++; dev->netdev->stats.tx_bytes += skb->len; } @@ -973,6 +974,31 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) fwnet_free_ptask(ptask); } +static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask) +{ + struct fwnet_device *dev = ptask->dev; + unsigned long flags; + bool free; + + spin_lock_irqsave(&dev->lock, flags); + + /* One fragment failed; don't try to send remaining fragments. */ + ptask->outstanding_pkts = 0; + + /* Check whether we or the networking TX soft-IRQ is last user. */ + free = !list_empty(&ptask->pt_link); + if (free) + list_del(&ptask->pt_link); + + dev->netdev->stats.tx_dropped++; + dev->netdev->stats.tx_errors++; + + spin_unlock_irqrestore(&dev->lock, flags); + + if (free) + fwnet_free_ptask(ptask); +} + static void fwnet_write_complete(struct fw_card *card, int rcode, void *payload, size_t length, void *data) { @@ -980,11 +1006,12 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, ptask = data; - if (rcode == RCODE_COMPLETE) + if (rcode == RCODE_COMPLETE) { fwnet_transmit_packet_done(ptask); - else + } else { fw_error("fwnet_write_complete: failed: %x\n", rcode); - /* ??? error recovery */ + fwnet_transmit_packet_failed(ptask); + } } static int fwnet_send_packet(struct fwnet_packet_task *ptask) From 48553011cea504796e513350740781ac6745f556 Mon Sep 17 00:00:00 2001 From: Stefan Richter <stefanr@s5r6.in-berlin.de> Date: Sat, 6 Nov 2010 23:18:23 +0100 Subject: [PATCH 023/179] firewire: net: replace lists by counters The current transmit code does not at all make use of - fwnet_device.packet_list and only very limited use of - fwnet_device.broadcasted_list, - fwnet_device.queued_packets. Their current function is to track whether the TX soft-IRQ finished dealing with an skb when the AT-req tasklet takes over, and to discard pending tx datagrams (if there are any) when the local node is removed. The latter does actually contain a race condition bug with TX soft-IRQ and AT-req tasklet. Instead of these lists and the corresponding link in fwnet_packet_task, - a flag in fwnet_packet_task to track whether fwnet_tx is done, - a counter of queued datagrams in fwnet_device do the job as well. The above mentioned theoretic race condition is resolved by letting fwnet_remove sleep until all datagrams were flushed. It may sleep almost arbitrarily long since fwnet_remove is executed in the context of a multithreaded (concurrency managed) workqueue. The type of max_payload is changed to u16 here to avoid waste in struct fwnet_packet_task. This value cannot exceed 4096 per IEEE 1394:2008 table 16-18 (or 32678 per specification of packet headers, if there is ever going to be something else than beta mode). Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> --- drivers/firewire/net.c | 73 +++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 47 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 3a27cee5bf26..d2d488b9cd96 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -7,6 +7,7 @@ */ #include <linux/bug.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/firewire.h> #include <linux/firewire-constants.h> @@ -169,15 +170,8 @@ struct fwnet_device { struct fw_address_handler handler; u64 local_fifo; - /* List of packets to be sent */ - struct list_head packet_list; - /* - * List of packets that were broadcasted. When we get an ISO interrupt - * one of them has been sent - */ - struct list_head broadcasted_list; - /* List of packets that have been sent but not yet acked */ - struct list_head sent_list; + /* Number of tx datagrams that have been queued but not yet acked */ + int queued_datagrams; struct list_head peer_list; struct fw_card *card; @@ -195,7 +189,7 @@ struct fwnet_peer { unsigned pdg_size; /* pd_list size */ u16 datagram_label; /* outgoing datagram label */ - unsigned max_payload; /* includes RFC2374_FRAG_HDR_SIZE overhead */ + u16 max_payload; /* includes RFC2374_FRAG_HDR_SIZE overhead */ int node_id; int generation; unsigned speed; @@ -203,22 +197,18 @@ struct fwnet_peer { /* This is our task struct. It's used for the packet complete callback. */ struct fwnet_packet_task { - /* - * ptask can actually be on dev->packet_list, dev->broadcasted_list, - * or dev->sent_list depending on its current state. - */ - struct list_head pt_link; struct fw_transaction transaction; struct rfc2734_header hdr; struct sk_buff *skb; struct fwnet_device *dev; int outstanding_pkts; - unsigned max_payload; u64 fifo_addr; u16 dest_node; + u16 max_payload; u8 generation; u8 speed; + u8 enqueued; }; /* @@ -915,9 +905,9 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) ptask->outstanding_pkts--; /* Check whether we or the networking TX soft-IRQ is last user. */ - free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link)); + free = (ptask->outstanding_pkts == 0 && ptask->enqueued); if (free) - list_del(&ptask->pt_link); + dev->queued_datagrams--; if (ptask->outstanding_pkts == 0) { dev->netdev->stats.tx_packets++; @@ -986,9 +976,9 @@ static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask) ptask->outstanding_pkts = 0; /* Check whether we or the networking TX soft-IRQ is last user. */ - free = !list_empty(&ptask->pt_link); + free = ptask->enqueued; if (free) - list_del(&ptask->pt_link); + dev->queued_datagrams--; dev->netdev->stats.tx_dropped++; dev->netdev->stats.tx_errors++; @@ -1069,9 +1059,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); /* If the AT tasklet already ran, we may be last user. */ - free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); + free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) - list_add_tail(&ptask->pt_link, &dev->broadcasted_list); + ptask->enqueued = true; + else + dev->queued_datagrams--; spin_unlock_irqrestore(&dev->lock, flags); @@ -1086,9 +1078,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); /* If the AT tasklet already ran, we may be last user. */ - free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); + free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) - list_add_tail(&ptask->pt_link, &dev->sent_list); + ptask->enqueued = true; + else + dev->queued_datagrams--; spin_unlock_irqrestore(&dev->lock, flags); @@ -1350,10 +1344,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload += RFC2374_FRAG_HDR_SIZE; } + dev->queued_datagrams++; + spin_unlock_irqrestore(&dev->lock, flags); ptask->max_payload = max_payload; - INIT_LIST_HEAD(&ptask->pt_link); + ptask->enqueued = 0; fwnet_send_packet(ptask); @@ -1487,14 +1483,9 @@ static int fwnet_probe(struct device *_dev) dev->broadcast_rcv_context = NULL; dev->broadcast_xmt_max_payload = 0; dev->broadcast_xmt_datagramlabel = 0; - dev->local_fifo = FWNET_NO_FIFO_ADDR; - - INIT_LIST_HEAD(&dev->packet_list); - INIT_LIST_HEAD(&dev->broadcasted_list); - INIT_LIST_HEAD(&dev->sent_list); + dev->queued_datagrams = 0; INIT_LIST_HEAD(&dev->peer_list); - dev->card = card; dev->netdev = net; @@ -1552,7 +1543,7 @@ static int fwnet_remove(struct device *_dev) struct fwnet_peer *peer = dev_get_drvdata(_dev); struct fwnet_device *dev = peer->dev; struct net_device *net; - struct fwnet_packet_task *ptask, *pt_next; + int i; mutex_lock(&fwnet_device_mutex); @@ -1570,21 +1561,9 @@ static int fwnet_remove(struct device *_dev) dev->card); fw_iso_context_destroy(dev->broadcast_rcv_context); } - list_for_each_entry_safe(ptask, pt_next, - &dev->packet_list, pt_link) { - dev_kfree_skb_any(ptask->skb); - kmem_cache_free(fwnet_packet_task_cache, ptask); - } - list_for_each_entry_safe(ptask, pt_next, - &dev->broadcasted_list, pt_link) { - dev_kfree_skb_any(ptask->skb); - kmem_cache_free(fwnet_packet_task_cache, ptask); - } - list_for_each_entry_safe(ptask, pt_next, - &dev->sent_list, pt_link) { - dev_kfree_skb_any(ptask->skb); - kmem_cache_free(fwnet_packet_task_cache, ptask); - } + for (i = 0; dev->queued_datagrams && i < 5; i++) + ssleep(1); + WARN_ON(dev->queued_datagrams); list_del(&dev->dev_link); free_netdev(net); From b2268830f5cf29d94b3e4a2af0b795a8f28776fe Mon Sep 17 00:00:00 2001 From: Stefan Richter <stefanr@s5r6.in-berlin.de> Date: Sun, 14 Nov 2010 14:35:40 +0100 Subject: [PATCH 024/179] firewire: net: throttle TX queue before running out of tlabels This prevents firewire-net from submitting write requests in fast succession until failure due to all 64 transaction labels were used up for unfinished split transactions. The netif_stop/wake_queue API is used for this purpose. Without this stop/wake mechanism, datagrams were simply lost whenever the tlabel pool was exhausted. Plus, tlabel exhaustion by firewire-net also prevented other unrelated outbound transactions to be initiated. The chosen queue depth was checked by me to hit the maximum possible throughput with an OS X peer whose receive DMA is good enough to never reject requests due to busy inbound request FIFO. Current Linux peers show a mixed picture of -5%...+15% change in bandwidth; their current bottleneck are RCODE_BUSY situations (fewer or more, depending on TX queue depth) due to too small AR buffer in firewire-ohci. Maxim Levitsky tested this change with similar watermarks with a Linux peer and some pending firewire-ohci improvements that address the RCODE_BUSY problem and confirmed that these TX queue limits are good. Note: This removes some netif_wake_queue from reception code paths. They were apparently copy&paste artefacts from a nonsensical netif_wake_queue use in the older eth1394 driver. This belongs only into the transmit path. Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> Tested-by: Maxim Levitsky <maximlevitsky@gmail.com> --- drivers/firewire/net.c | 59 +++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index d2d488b9cd96..1a467a91fb0b 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -27,8 +27,14 @@ #include <asm/unaligned.h> #include <net/arp.h> -#define FWNET_MAX_FRAGMENTS 25 /* arbitrary limit */ -#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16 * 1024 ? 4 : 2) +/* rx limits */ +#define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */ +#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16*1024 ? 4 : 2) + +/* tx limits */ +#define FWNET_MAX_QUEUED_DATAGRAMS 20 /* < 64 = number of tlabels */ +#define FWNET_MIN_QUEUED_DATAGRAMS 10 /* should keep AT DMA busy enough */ +#define FWNET_TX_QUEUE_LEN FWNET_MAX_QUEUED_DATAGRAMS /* ? */ #define IEEE1394_BROADCAST_CHANNEL 31 #define IEEE1394_ALL_NODES (0xffc0 | 0x003f) @@ -640,8 +646,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, net->stats.rx_packets++; net->stats.rx_bytes += skb->len; } - if (netif_queue_stopped(net)) - netif_wake_queue(net); return 0; @@ -650,8 +654,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, net->stats.rx_dropped++; dev_kfree_skb_any(skb); - if (netif_queue_stopped(net)) - netif_wake_queue(net); return -ENOENT; } @@ -783,15 +785,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, * Datagram is not complete, we're done for the * moment. */ - spin_unlock_irqrestore(&dev->lock, flags); - - return 0; + retval = 0; fail: spin_unlock_irqrestore(&dev->lock, flags); - if (netif_queue_stopped(net)) - netif_wake_queue(net); - return retval; } @@ -891,6 +888,13 @@ static void fwnet_free_ptask(struct fwnet_packet_task *ptask) kmem_cache_free(fwnet_packet_task_cache, ptask); } +/* Caller must hold dev->lock. */ +static void dec_queued_datagrams(struct fwnet_device *dev) +{ + if (--dev->queued_datagrams == FWNET_MIN_QUEUED_DATAGRAMS) + netif_wake_queue(dev->netdev); +} + static int fwnet_send_packet(struct fwnet_packet_task *ptask); static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) @@ -907,7 +911,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) /* Check whether we or the networking TX soft-IRQ is last user. */ free = (ptask->outstanding_pkts == 0 && ptask->enqueued); if (free) - dev->queued_datagrams--; + dec_queued_datagrams(dev); if (ptask->outstanding_pkts == 0) { dev->netdev->stats.tx_packets++; @@ -978,7 +982,7 @@ static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask) /* Check whether we or the networking TX soft-IRQ is last user. */ free = ptask->enqueued; if (free) - dev->queued_datagrams--; + dec_queued_datagrams(dev); dev->netdev->stats.tx_dropped++; dev->netdev->stats.tx_errors++; @@ -1063,7 +1067,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) if (!free) ptask->enqueued = true; else - dev->queued_datagrams--; + dec_queued_datagrams(dev); spin_unlock_irqrestore(&dev->lock, flags); @@ -1082,7 +1086,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) if (!free) ptask->enqueued = true; else - dev->queued_datagrams--; + dec_queued_datagrams(dev); spin_unlock_irqrestore(&dev->lock, flags); @@ -1248,6 +1252,15 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) struct fwnet_peer *peer; unsigned long flags; + spin_lock_irqsave(&dev->lock, flags); + + /* Can this happen? */ + if (netif_queue_stopped(dev->netdev)) { + spin_unlock_irqrestore(&dev->lock, flags); + + return NETDEV_TX_BUSY; + } + ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC); if (ptask == NULL) goto fail; @@ -1266,9 +1279,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) proto = hdr_buf.h_proto; dg_size = skb->len; - /* serialize access to peer, including peer->datagram_label */ - spin_lock_irqsave(&dev->lock, flags); - /* * Set the transmission type for the packet. ARP packets and IP * broadcast packets are sent via GASP. @@ -1290,7 +1300,7 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) - goto fail_unlock; + goto fail; generation = peer->generation; dest_node = peer->node_id; @@ -1344,7 +1354,8 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload += RFC2374_FRAG_HDR_SIZE; } - dev->queued_datagrams++; + if (++dev->queued_datagrams == FWNET_MAX_QUEUED_DATAGRAMS) + netif_stop_queue(dev->netdev); spin_unlock_irqrestore(&dev->lock, flags); @@ -1355,9 +1366,9 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; - fail_unlock: - spin_unlock_irqrestore(&dev->lock, flags); fail: + spin_unlock_irqrestore(&dev->lock, flags); + if (ptask) kmem_cache_free(fwnet_packet_task_cache, ptask); @@ -1403,7 +1414,7 @@ static void fwnet_init_dev(struct net_device *net) net->addr_len = FWNET_ALEN; net->hard_header_len = FWNET_HLEN; net->type = ARPHRD_IEEE1394; - net->tx_queue_len = 10; + net->tx_queue_len = FWNET_TX_QUEUE_LEN; } /* caller must hold fwnet_device_mutex */ From bbe425cd9ae83eacd0c9f09df2bf56dc911a54cd Mon Sep 17 00:00:00 2001 From: Jens Axboe <jaxboe@fusionio.com> Date: Wed, 17 Nov 2010 11:56:13 +0100 Subject: [PATCH 025/179] cciss: fix build for PROC_FS disabled The recent patch to fix the removal of a non-existing proc directory introduced this build problem for !CONFIG_PROC_FS: drivers/block/cciss.c:4929: error: 'proc_cciss' undeclared (first use in this function) Fix it by moving proc_cciss outside of the CONFIG_PROC_FS scope. Reported-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com> --- drivers/block/cciss.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index a67d0a611a8a..f291587d753e 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26"); MODULE_LICENSE("GPL"); static DEFINE_MUTEX(cciss_mutex); +static struct proc_dir_entry *proc_cciss; #include "cciss_cmd.h" #include "cciss.h" @@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", #define ENG_GIG_FACTOR (ENG_GIG/512) #define ENGAGE_SCSI "engage scsi" -static struct proc_dir_entry *proc_cciss; - static void cciss_seq_show_header(struct seq_file *seq) { ctlr_info_t *h = seq->private; From b432b4b3440a34c1430fcd66bab783640724bd28 Mon Sep 17 00:00:00 2001 From: kishore kadiyala <kishore.kadiyala@ti.com> Date: Wed, 17 Nov 2010 22:35:32 -0500 Subject: [PATCH 026/179] mmc: omap4: hsmmc: Fix improper card detection while booting While booting OMAP4 ES2.0 boards, cards on MMC1 and MMC2 controllers are not getting detected sometimes. During reset of command/data line, wrong pointer to base address was passed while read operation to SYSCTL register, thus impacting the updated reset logic. Passing the correct base address fixes the issue. Signed-off-by: Kishore Kadiyala <kishore.kadiyala@ti.com> Acked-by: Felipe Balbi <balbi@ti.com> Acked-by: Madhusudhan Chikkature <madhu.cr@ti.com> Acked-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/host/omap_hsmmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 82a1079bbdc7..5d46021cbb57 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1002,7 +1002,7 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host, * Monitor a 0->1 transition first */ if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) { - while ((!(OMAP_HSMMC_READ(host, SYSCTL) & bit)) + while ((!(OMAP_HSMMC_READ(host->base, SYSCTL) & bit)) && (i++ < limit)) cpu_relax(); } From 4d048435e9864998f6a6ad16422393d42322716d Mon Sep 17 00:00:00 2001 From: Magnus Damm <damm@opensource.se> Date: Wed, 17 Nov 2010 11:44:00 +0000 Subject: [PATCH 027/179] ARM: mach-shmobile: sh7372 USB0/IIC1 MSTP fix Fix a MSTP assignment problem in the sh7372 clock framework code. The USB drivers should attach to MSTP322 not MSTP33 where IIC1 is located. Signed-off-by: Magnus Damm <damm@opensource.se> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/clock-sh7372.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..d3313a95dbfc 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -647,8 +647,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */ CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */ - CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP323]), /* USB0 */ - CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP323]), /* USB0 */ + CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */ + CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP322]), /* USB0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */ From 0e2af2a9abf94b408ff70679b692a8644fed4aab Mon Sep 17 00:00:00 2001 From: Rakib Mullick <rakib.mullick@gmail.com> Date: Fri, 12 Nov 2010 09:50:54 -0500 Subject: [PATCH 028/179] x86, hw_nmi: Move backtrace_mask declaration under ARCH_HAS_NMI_WATCHDOG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit backtrace_mask has been used under the code context of ARCH_HAS_NMI_WATCHDOG. So put it into that context. We were warned by the following warning: arch/x86/kernel/apic/hw_nmi.c:21: warning: ‘backtrace_mask’ defined but not used Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com> Signed-off-by: Don Zickus <dzickus@redhat.com> LKML-Reference: <1289573455-3410-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/apic/hw_nmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e9..62f6e1e55b90 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -17,15 +17,16 @@ #include <linux/nmi.h> #include <linux/module.h> -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } #ifdef ARCH_HAS_NMI_WATCHDOG + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + void arch_trigger_all_cpu_backtrace(void) { int i; From 96e612ffc301372d3a3b94e2cb5d1e0c1c207dd1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Date: Tue, 16 Nov 2010 13:45:16 +0900 Subject: [PATCH 029/179] x86, asm: Fix binutils 2.15 build failure Add parentheses around one pushl_cfi argument. Commit df5d1874 "x86: Use {push,pop}{l,q}_cfi in more places" caused GNU assembler 2.15 (Debian Sarge) to fail. It is still failing as of commit 07bd8516 "x86, asm: Restore parentheses around one pushl_cfi argument". This patch solves build failure with GNU assembler 2.15. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Acked-by: Jan Beulich <jbeulich@novell.com> Cc: heukelum@fastmail.fm Cc: hpa@linux.intel.com LKML-Reference: <201011160445.oAG4jGif079860@www262.sakura.ne.jp> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 59e175e89599..591e60104278 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -395,7 +395,7 @@ sysenter_past_esp: * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax From 9223081f54e3dc5045fe41a475165d9003c9a779 Mon Sep 17 00:00:00 2001 From: Yinghai Lu <yinghai@kernel.org> Date: Sat, 13 Nov 2010 10:52:09 -0800 Subject: [PATCH 030/179] x86: Use online node real index in calulate_tbl_offset() Found a NUMA system that doesn't have RAM installed at the first socket which hangs while executing init scripts. bisected it to: | commit 932967202182743c01a2eee4bdfa2c42697bc586 | Author: Shaohua Li <shaohua.li@intel.com> | Date: Wed Oct 20 11:07:03 2010 +0800 | | x86: Spread tlb flush vector between nodes It turns out when first socket is not online it could have cpus on node1 tlb_offset set to bigger than NUM_INVALIDATE_TLB_VECTORS. That could affect systems like 4 sockets, but socket 2 doesn't have installed, sockets 3 will get too big tlb_offset. Need to use real online node idx. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Acked-by: Shaohua Li <shaohua.li@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> LKML-Reference: <4CDEDE59.40603@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/tlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 12cdbb17ad18..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, static void __cpuinit calculate_tlb_offset(void) { - int cpu, node, nr_node_vecs; + int cpu, node, nr_node_vecs, idx = 0; /* * we are changing tlb_vector_offset for each CPU in runtime, but this * will not cause inconsistency, as the write is atomic under X86. we @@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; for_each_online_node(node) { - int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * + int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * nr_node_vecs; int cpu_offset = 0; for_each_cpu(cpu, cpumask_of_node(node)) { @@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void) cpu_offset++; cpu_offset = cpu_offset % nr_node_vecs; } + idx++; } } From 8191c9f69202d4dbc66063cb92059b8a58640d34 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich <sivanich@sgi.com> Date: Tue, 16 Nov 2010 16:23:52 -0600 Subject: [PATCH 031/179] x86: UV: Address interrupt/IO port operation conflict This patch for SGI UV systems addresses a problem whereby interrupt transactions being looped back from a local IOH, through the hub to a local CPU can (erroneously) conflict with IO port operations and other transactions. To workaound this we set a high bit in the APIC IDs used for interrupts. This bit appears to be ignored by the sockets, but it avoids the conflict in the hub. Signed-off-by: Dimitri Sivanich <sivanich@sgi.com> LKML-Reference: <20101116222352.GA8155@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> ___ arch/x86/include/asm/uv/uv_hub.h | 4 ++++ arch/x86/include/asm/uv/uv_mmrs.h | 19 ++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++-- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/platform/uv/uv_time.c | 4 +++- 5 files changed, 49 insertions(+), 5 deletions(-) --- arch/x86/include/asm/uv/uv_hub.h | 4 ++++ arch/x86/include/asm/uv/uv_mmrs.h | 19 ++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++-- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/platform/uv/uv_time.c | 4 +++- 5 files changed, 49 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index e969f691cbfd..a501741c2335 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -199,6 +199,8 @@ union uvh_apicid { #define UVH_APICID 0x002D0E00L #define UV_APIC_PNODE_SHIFT 6 +#define UV_APICID_HIBIT_MASK 0xffff0000 + /* Local Bus from cpu's perspective */ #define LOCAL_BUS_BASE 0x1c00000 #define LOCAL_BUS_SIZE (4 * 1024 * 1024) @@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } } +extern unsigned int uv_apicid_hibits; static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) { + apicid |= uv_apicid_hibits; return (1UL << UVH_IPI_INT_SEND_SHFT) | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 6d90adf4428a..20cafeac7455 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -753,6 +753,23 @@ union uvh_lb_bau_sb_descriptor_base_u { } s; }; +/* ========================================================================= */ +/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ +/* ========================================================================= */ +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 + +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL + +union uvh_lb_target_physical_apic_id_mask_u { + unsigned long v; + struct uvh_lb_target_physical_apic_id_mask_s { + unsigned long bit_enables : 32; /* RW */ + unsigned long rsvd_32_63 : 32; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 194539aea175..c1c52c341f40 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr; static union uvh_apicid uvh_apicid; int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); +unsigned int uv_apicid_hibits; +EXPORT_SYMBOL_GPL(uv_apicid_hibits); static DEFINE_SPINLOCK(uv_nmi_lock); static inline bool is_GRU_range(u64 start, u64 end) @@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void) uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; } +/* + * Add an extra bit as dictated by bios to the destination apicid of + * interrupts potentially passing through the UV HUB. This prevents + * a deadlock between interrupts and IO port operations. + */ +static void __init uv_set_apicid_hibit(void) +{ + union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | + UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); + apicid_mask.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; +} + static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int nodeid; @@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) __get_cpu_var(x2apic_extra_bits) = nodeid << (uvh_apicid.s.pnode_shift - 1); uv_system_type = UV_NON_UNIQUE_APIC; + uv_set_apicid_hibit(); return 1; } } @@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri int pnode; pnode = uv_apicid_to_pnode(phys_apicid); + phys_apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | @@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) int cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; else return BAD_APICID; } @@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a318194002b5..ba9caa808a9c 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector) * the below initialization can't be in firmware because the * messaging IRQ will be determined by the OS */ - apicid = uvhub_to_first_apicid(uvhub); + apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, ((apicid << 32) | vector)); } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 56e421bc379b..9daf5d1af9f1 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu) apicid = cpu_physical_id(cpu); pnode = uv_apicid_to_pnode(apicid); + apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); @@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode) static int uv_setup_intr(int cpu, u64 expires) { u64 val; + unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; int pnode = uv_cpu_to_pnode(cpu); uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, @@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires) UVH_EVENT_OCCURRED0_RTC1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | - ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); + ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); /* Set configuration */ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); From b5482cfa1c95a188b3054fa33274806add91bbe5 Mon Sep 17 00:00:00 2001 From: Alex Shi <alex.shi@intel.com> Date: Tue, 16 Nov 2010 17:34:02 +0800 Subject: [PATCH 032/179] sched: Fix volanomark performance regression Commit fab4762 triggers excessive idle balancing, causing a ~30% loss in volanomark throughput. Remove idle balancing throttle reset. Originally-by: Alex Shi <alex.shi@intel.com> Signed-off-by: Mike Galbraith <efault@gmx.de> Acked-by: Nikhil Rao <ncrao@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1289928732.5169.211.camel@maggy.simson.net> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- kernel/sched_fair.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 52ab113d8bb9..ba0556dc7c06 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1758,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); check_preempt_curr(this_rq, p, 0); - - /* re-arm NEWIDLE balancing when moving tasks */ - src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; - this_rq->idle_stamp = 0; } /* From d5ad140bc1505a98c0f040937125bfcbb508078f Mon Sep 17 00:00:00 2001 From: Nikhil Rao <ncrao@google.com> Date: Wed, 17 Nov 2010 11:42:04 -0800 Subject: [PATCH 033/179] sched: Fix idle balancing An earlier commit reverts idle balancing throttling reset to fix a 30% regression in volanomark throughput. We still need to reset idle_stamp when we pull a task in newidle balance. Reported-by: Alex Shi <alex.shi@intel.com> Signed-off-by: Nikhil Rao <ncrao@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1290022924-3548-1-git-send-email-ncrao@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- kernel/sched_fair.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ba0556dc7c06..00ebd7686676 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3215,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; - if (pulled_task) + if (pulled_task) { + this_rq->idle_stamp = 0; break; + } } raw_spin_lock(&this_rq->lock); From 8882135bcd332f294df5455747ea43ba9e6f77ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <a.p.zijlstra@chello.nl> Date: Tue, 9 Nov 2010 19:01:43 +0100 Subject: [PATCH 034/179] perf: Fix owner-list vs exit Oleg noticed that a perf-fd keeping a reference on the creating task leads to a few funny side effects. There's two different aspects to this: - kernel based perf-events, these should not take out a reference on the creating task and appear on the task's event list since they're not bound to fds nor visible to userspace. - fork() and pthread_create(), these can lead to the creating task dying (and thus the task's event-list becomming useless) but keeping the list and ref alive until the event is closed. Combined they lead to malfunction of the ptrace hw_tracepoints. Cure this by not considering kernel based perf_events for the owner-list and destroying the owner-list when the owner dies. Reported-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Oleg Nesterov <oleg@redhat.com> LKML-Reference: <1289576883.2084.286.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- kernel/perf_event.c | 63 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f818d9d2dc93..671f6c8c8a32 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2235,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event) raw_spin_unlock_irq(&ctx->lock); mutex_unlock(&ctx->mutex); - mutex_lock(&event->owner->perf_event_mutex); - list_del_init(&event->owner_entry); - mutex_unlock(&event->owner->perf_event_mutex); - put_task_struct(event->owner); - free_event(event); return 0; @@ -2252,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); static int perf_release(struct inode *inode, struct file *file) { struct perf_event *event = file->private_data; + struct task_struct *owner; file->private_data = NULL; + rcu_read_lock(); + owner = ACCESS_ONCE(event->owner); + /* + * Matches the smp_wmb() in perf_event_exit_task(). If we observe + * !owner it means the list deletion is complete and we can indeed + * free this event, otherwise we need to serialize on + * owner->perf_event_mutex. + */ + smp_read_barrier_depends(); + if (owner) { + /* + * Since delayed_put_task_struct() also drops the last + * task reference we can safely take a new reference + * while holding the rcu_read_lock(). + */ + get_task_struct(owner); + } + rcu_read_unlock(); + + if (owner) { + mutex_lock(&owner->perf_event_mutex); + /* + * We have to re-check the event->owner field, if it is cleared + * we raced with perf_event_exit_task(), acquiring the mutex + * ensured they're done, and we can proceed with freeing the + * event. + */ + if (event->owner) + list_del_init(&event->owner_entry); + mutex_unlock(&owner->perf_event_mutex); + put_task_struct(owner); + } + return perf_event_release_kernel(event); } @@ -5678,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open, mutex_unlock(&ctx->mutex); event->owner = current; - get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); @@ -5746,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ++ctx->generation; mutex_unlock(&ctx->mutex); - event->owner = current; - get_task_struct(current); - mutex_lock(¤t->perf_event_mutex); - list_add_tail(&event->owner_entry, ¤t->perf_event_list); - mutex_unlock(¤t->perf_event_mutex); - return event; err_free: @@ -5902,8 +5925,24 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) */ void perf_event_exit_task(struct task_struct *child) { + struct perf_event *event, *tmp; int ctxn; + mutex_lock(&child->perf_event_mutex); + list_for_each_entry_safe(event, tmp, &child->perf_event_list, + owner_entry) { + list_del_init(&event->owner_entry); + + /* + * Ensure the list deletion is visible before we clear + * the owner, closes a race against perf_release() where + * we need to serialize on the owner->perf_event_mutex. + */ + smp_wmb(); + event->owner = NULL; + } + mutex_unlock(&child->perf_event_mutex); + for_each_task_context_nr(ctxn) perf_event_exit_task_context(child, ctxn); } From 94e8ba728640dc01375a14e337f3b892bfacbeeb Mon Sep 17 00:00:00 2001 From: Sergio Aguirre <saaguirre@ti.com> Date: Tue, 16 Nov 2010 12:02:47 -0600 Subject: [PATCH 035/179] irq_work: Drop cmpxchg() result The compiler warned us about: kernel/irq_work.c: In function 'irq_work_run': kernel/irq_work.c:148: warning: value computed is not used Dropping the cmpxchg() result is indeed weird, but correct - so annotate away the warning. Signed-off-by: Sergio Aguirre <saaguirre@ti.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1289930567-17828-1-git-send-email-saaguirre@ti.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- kernel/irq_work.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff8481..90f881904bb1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -145,7 +145,9 @@ void irq_work_run(void) * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); + (void)cmpxchg(&entry->next, + next_flags(NULL, IRQ_WORK_BUSY), + NULL); } } EXPORT_SYMBOL_GPL(irq_work_run); From de31ec8a31046111befd16a7083e3bdda2ff42cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Date: Thu, 18 Nov 2010 19:16:55 +0900 Subject: [PATCH 036/179] x86/kprobes: Prevent kprobes to probe on save_args() Prevent kprobes to probe on save_args() since this function will be called from breakpoint exception handler. That will cause infinit loop on breakpoint handling. Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> LKML-Reference: <20101118101655.2779.2816.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fe2690d71c0c..e3ba417e8697 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64) .endm /* save partial stack frame */ + .pushsection .kprobes.text, "ax" ENTRY(save_args) XCPT_FRAME cld @@ -334,6 +335,7 @@ ENTRY(save_args) ret CFI_ENDPROC END(save_args) + .popsection ENTRY(save_rest) PARTIAL_FRAME 1 REST_SKIP+8 From 37db6c8f1d0c4b8f01dc049f3a893b725288660f Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@novell.com> Date: Tue, 16 Nov 2010 08:25:08 +0000 Subject: [PATCH 037/179] x86-64: Fix and clean up AMD Fam10 MMCONF enabling Candidate memory ranges were not calculated properly (start addresses got needlessly rounded down, and end addresses didn't get rounded up at all), address comparison for secondary CPUs was done on only part of the address, and disabled status wasn't tracked properly. Signed-off-by: Jan Beulich <jbeulich@novell.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com> LKML-Reference: <4CE24DF40200007800022737@vpn.id2.novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/msr-index.h | 2 +- arch/x86/kernel/mmconf-fam10h_64.c | 64 ++++++++++++++---------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ea3dc487047..6b89f5e86021 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,7 +128,7 @@ #define FAM10H_MMIO_CONF_ENABLE (1<<0) #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 -#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 6da143c2a6b8..ac861b8348e2 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -25,7 +25,6 @@ struct pci_hostbridge_probe { }; static u64 __cpuinitdata fam10h_pci_mmconf_base; -static int __cpuinitdata fam10h_pci_mmconf_base_status; static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, @@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2) return start1 - start2; } -/*[47:0] */ -/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ +#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT) +#define MMCONF_MASK (~(MMCONF_UNIT - 1)) +#define MMCONF_SIZE (MMCONF_UNIT << 8) +/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) -#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) +#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) static void __cpuinit get_fam10h_pci_mmconf_base(void) { int i; @@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) struct range range[8]; /* only try to get setting from BSP */ - /* -1 or 1 */ - if (fam10h_pci_mmconf_base_status) + if (fam10h_pci_mmconf_base) return; if (!early_pci_allowed()) - goto fail; + return; found = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { @@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) } if (!found) - goto fail; + return; /* SYS_CFG */ address = MSR_K8_SYSCFG; @@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) /* TOP_MEM2 is not enabled? */ if (!(val & (1<<21))) { - tom2 = 0; + tom2 = 1ULL << 32; } else { /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - tom2 = val & (0xffffULL<<32); + tom2 = max(val & 0xffffff800000ULL, 1ULL << 32); } if (base <= tom2) - base = tom2 + (1ULL<<32); + base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK; /* * need to check if the range is in the high mmio range that is @@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (!(reg & 3)) continue; - start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/ reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); - end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/ - if (!end) + if (end < tom2) continue; range[hi_mmio_num].start = start; @@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (range[hi_mmio_num - 1].end < base) goto out; - if (range[0].start > base) + if (range[0].start > base + MMCONF_SIZE) goto out; /* need to find one window */ - base = range[0].start - (1ULL << 32); + base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT; if ((base > tom2) && BASE_VALID(base)) goto out; - base = range[hi_mmio_num - 1].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) + base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK; + if (BASE_VALID(base)) goto out; /* need to find window between ranges */ - if (hi_mmio_num > 1) - for (i = 0; i < hi_mmio_num - 1; i++) { - if (range[i + 1].start > (range[i].end + (1ULL << 32))) { - base = range[i].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) - goto out; - } + for (i = 1; i < hi_mmio_num; i++) { + base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK; + val = range[i].start & MMCONF_MASK; + if (val >= base + MMCONF_SIZE && BASE_VALID(base)) + goto out; } - -fail: - fam10h_pci_mmconf_base_status = -1; return; + out: fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; } void __cpuinit fam10h_check_enable_mmcfg(void) @@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) /* only trust the one handle 256 buses, if acpi=off */ if (!acpi_pci_disabled || busnbits >= 8) { - u64 base; - base = val & (0xffffULL << 32); - if (fam10h_pci_mmconf_base_status <= 0) { + u64 base = val & MMCONF_MASK; + + if (!fam10h_pci_mmconf_base) { fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; return; } else if (fam10h_pci_mmconf_base == base) return; @@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) * with 256 buses */ get_fam10h_pci_mmconf_base(); - if (fam10h_pci_mmconf_base_status <= 0) + if (!fam10h_pci_mmconf_base) { + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF; return; + } printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | From 14870b457524e745f1a118e17873d104b1a47b70 Mon Sep 17 00:00:00 2001 From: Abhijith Das <adas@redhat.com> Date: Thu, 18 Nov 2010 11:24:24 -0500 Subject: [PATCH 038/179] GFS2: Userland expects quota limit/warn/usage in 512b blocks Userland programs using the quotactl() syscall assume limit/warn/usage block counts in 512b basic blocks which were instead being read/written in fs blocksize in gfs2. With this patch, gfs2 correctly interacts with the syscall using 512b blocks. Signed-off-by: Abhi Das <adas@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> --- fs/gfs2/quota.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58a9b9998b42..f606baf9ba72 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct fs_disk_quota *fdq) { struct inode *inode = &ip->i_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; unsigned offset = loc & (PAGE_CACHE_SIZE - 1); @@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, qd->qd_qb.qb_value = qp->qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_warn = qp->qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_limit = qp->qu_limit; } } @@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, fdq->d_version = FS_DQUOT_VERSION; fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; fdq->d_id = id; - fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); - fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); - fdq->d_bcount = be64_to_cpu(qlvb->qb_value); + fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; + fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; + fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; gfs2_glock_dq_uninit(&q_gh); out: @@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, /* If nothing has changed, this is a no-op */ if ((fdq->d_fieldmask & FS_DQ_BSOFT) && - (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) + ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) fdq->d_fieldmask ^= FS_DQ_BSOFT; if ((fdq->d_fieldmask & FS_DQ_BHARD) && - (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) + ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) fdq->d_fieldmask ^= FS_DQ_BHARD; if (fdq->d_fieldmask == 0) goto out_i; From 1b1d76e2df2a0aa965d6a227db7061d3fff029b1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <error27@gmail.com> Date: Thu, 18 Nov 2010 06:58:04 +0300 Subject: [PATCH 039/179] UBI: release locks in check_corruption Commit 45aafd32996e27 "UBI: tighten the corrupted PEB criteria" introduced some return paths that didn't release the ubi->buf_mutex Signed-off-by: Dan Carpenter <error27@gmail.com> Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> --- drivers/mtd/ubi/scan.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 3c631863bf40..204345be8e62 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -787,16 +787,15 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, * erased, so it became unstable and corrupted, and should be * erased. */ - return 0; + err = 0; + goto out_unlock; } if (err) - return err; + goto out_unlock; - if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) { - mutex_unlock(&ubi->buf_mutex); - return 0; - } + if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) + goto out_unlock; ubi_err("PEB %d contains corrupted VID header, and the data does not " "contain all 0xFF, this may be a non-UBI PEB or a severe VID " @@ -806,8 +805,11 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, pnum, ubi->leb_start, ubi->leb_size); ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ubi->peb_buf1, ubi->leb_size, 1); + err = 1; + +out_unlock: mutex_unlock(&ubi->buf_mutex); - return 1; + return err; } /** From 4d0812c37f2f6cf6fc7ca086b5a5e572cbbe7f6d Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen <ohad@wizery.com> Date: Sun, 14 Nov 2010 12:40:33 +0200 Subject: [PATCH 040/179] mmc: sdio: fix nasty oops in mmc_sdio_detect Power off the card in mmc_sdio_detect __before__ a potential error handler, which completely removes the card, executes, and only if the card was successfully powered on beforehand. While we're at it, use the _sync variant of the runtime PM put API, in order to ensure that the card is left powered off in case an error occurred, and the card is going to be removed. Reproduced and tested on the OLPC XO-1.5. Reported-by: Daniel Drake <dsd@laptop.org> Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/core/sdio.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index c3ad1058cd31..42a949b723b8 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -560,6 +560,19 @@ static void mmc_sdio_detect(struct mmc_host *host) mmc_release_host(host); + /* + * Tell PM core it's OK to power off the card now. + * + * The _sync variant is used in order to ensure that the card + * is left powered off in case an error occurred, and the card + * is going to be removed. + * + * Since there is no specific reason to believe a new user + * is about to show up at this point, the _sync variant is + * desirable anyway. + */ + pm_runtime_put_sync(&host->card->dev); + out: if (err) { mmc_sdio_remove(host); @@ -568,9 +581,6 @@ static void mmc_sdio_detect(struct mmc_host *host) mmc_detach_bus(host); mmc_release_host(host); } - - /* Tell PM core that we're done */ - pm_runtime_put(&host->card->dev); } /* From ed919b0125b26dcc052e44836f66e7e1f5c49c7e Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen <ohad@wizery.com> Date: Fri, 19 Nov 2010 09:29:09 +0200 Subject: [PATCH 041/179] mmc: sdio: fix runtime PM anomalies by introducing MMC_CAP_POWER_OFF_CARD Some board/card/host configurations are not capable of powering off the card after boot. To support such configurations, and to allow smoother transition to runtime PM behavior, MMC_CAP_POWER_OFF_CARD is added, so hosts need to explicitly indicate whether it's OK to power off their cards after boot. SDIO core will enable runtime PM for a card only if that cap is set. As a result, the card will be powered down after boot, and will only be powered up again when a driver is loaded (and then it's up to the driver to decide whether power will be kept or not). This will prevent sdio_bus_probe() failures with setups that do not support powering off the card. Reported-and-tested-by: Daniel Drake <dsd@laptop.org> Reported-and-tested-by: Arnd Hannemann <arnd@arndnet.de> Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- drivers/mmc/core/sdio.c | 37 +++++++++++++++++++++++-------------- drivers/mmc/core/sdio_bus.c | 33 ++++++++++++++++++++++----------- include/linux/mmc/host.h | 1 + 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 42a949b723b8..efef5f94ac42 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -547,9 +547,11 @@ static void mmc_sdio_detect(struct mmc_host *host) BUG_ON(!host->card); /* Make sure card is powered before detecting it */ - err = pm_runtime_get_sync(&host->card->dev); - if (err < 0) - goto out; + if (host->caps & MMC_CAP_POWER_OFF_CARD) { + err = pm_runtime_get_sync(&host->card->dev); + if (err < 0) + goto out; + } mmc_claim_host(host); @@ -571,7 +573,8 @@ static void mmc_sdio_detect(struct mmc_host *host) * is about to show up at this point, the _sync variant is * desirable anyway. */ - pm_runtime_put_sync(&host->card->dev); + if (host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_put_sync(&host->card->dev); out: if (err) { @@ -728,16 +731,21 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr) card = host->card; /* - * Let runtime PM core know our card is active + * Enable runtime PM only if supported by host+card+board */ - err = pm_runtime_set_active(&card->dev); - if (err) - goto remove; + if (host->caps & MMC_CAP_POWER_OFF_CARD) { + /* + * Let runtime PM core know our card is active + */ + err = pm_runtime_set_active(&card->dev); + if (err) + goto remove; - /* - * Enable runtime PM for this card - */ - pm_runtime_enable(&card->dev); + /* + * Enable runtime PM for this card + */ + pm_runtime_enable(&card->dev); + } /* * The number of functions on the card is encoded inside @@ -755,9 +763,10 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr) goto remove; /* - * Enable Runtime PM for this func + * Enable Runtime PM for this func (if supported) */ - pm_runtime_enable(&card->sdio_func[i]->dev); + if (host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_enable(&card->sdio_func[i]->dev); } mmc_release_host(host); diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 2716c7ab6bbf..203da443e339 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -17,6 +17,7 @@ #include <linux/pm_runtime.h> #include <linux/mmc/card.h> +#include <linux/mmc/host.h> #include <linux/mmc/sdio_func.h> #include "sdio_cis.h" @@ -132,9 +133,11 @@ static int sdio_bus_probe(struct device *dev) * it should call pm_runtime_put_noidle() in its probe routine and * pm_runtime_get_noresume() in its remove routine. */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) - goto out; + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) { + ret = pm_runtime_get_sync(dev); + if (ret < 0) + goto out; + } /* Set the default block size so the driver is sure it's something * sensible. */ @@ -151,7 +154,8 @@ static int sdio_bus_probe(struct device *dev) return 0; disable_runtimepm: - pm_runtime_put_noidle(dev); + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_put_noidle(dev); out: return ret; } @@ -160,12 +164,14 @@ static int sdio_bus_remove(struct device *dev) { struct sdio_driver *drv = to_sdio_driver(dev->driver); struct sdio_func *func = dev_to_sdio_func(dev); - int ret; + int ret = 0; /* Make sure card is powered before invoking ->remove() */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) - goto out; + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) { + ret = pm_runtime_get_sync(dev); + if (ret < 0) + goto out; + } drv->remove(func); @@ -178,10 +184,12 @@ static int sdio_bus_remove(struct device *dev) } /* First, undo the increment made directly above */ - pm_runtime_put_noidle(dev); + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_put_noidle(dev); /* Then undo the runtime PM settings in sdio_bus_probe() */ - pm_runtime_put_noidle(dev); + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_put_noidle(dev); out: return ret; @@ -191,6 +199,8 @@ static int sdio_bus_remove(struct device *dev) static int sdio_bus_pm_prepare(struct device *dev) { + struct sdio_func *func = dev_to_sdio_func(dev); + /* * Resume an SDIO device which was suspended at run time at this * point, in order to allow standard SDIO suspend/resume paths @@ -212,7 +222,8 @@ static int sdio_bus_pm_prepare(struct device *dev) * since there is little point in failing system suspend if a * device can't be resumed. */ - pm_runtime_resume(dev); + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_resume(dev); return 0; } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6d87f68ce4b6..30f6fad99a58 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -168,6 +168,7 @@ struct mmc_host { /* DDR mode at 1.8V */ #define MMC_CAP_1_2V_DDR (1 << 12) /* can support */ /* DDR mode at 1.2V */ +#define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */ mmc_pm_flag_t pm_caps; /* supported pm features */ From 784b4e29a26617589edd290dd2919735e190c06e Mon Sep 17 00:00:00 2001 From: Chris Mason <chris.mason@oracle.com> Date: Sun, 21 Nov 2010 22:20:49 -0500 Subject: [PATCH 042/179] Btrfs: add migrate page for metadata inode Migrate page will directly call the btrfs btree writepage function, which isn't actually allowed. Our writepage assumes that you have locked the extent_buffer and flagged the block as written. Without doing these steps, we can corrupt metadata blocks. A later commit will remove the btree writepage function since it is really only safely used internally by btrfs. We use writepages for everything else. Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/disk-io.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b40dfe48017b..a67b98d58c2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include <linux/freezer.h> #include <linux/crc32c.h> #include <linux/slab.h> +#include <linux/migrate.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); + WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); + found_start = btrfs_header_bytenr(eb); if (found_start != start) { WARN_ON(1); @@ -693,6 +696,26 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, __btree_submit_bio_done); } +static int btree_migratepage(struct address_space *mapping, + struct page *newpage, struct page *page) +{ + /* + * we can't safely write a btree page from here, + * we haven't done the locking hook + */ + if (PageDirty(page)) + return -EAGAIN; + /* + * Buffers may be managed in a filesystem specific way. + * We must have no buffers or drop them. + */ + if (page_has_private(page) && + !try_to_release_page(page, GFP_KERNEL)) + return -EAGAIN; + + return migrate_page(mapping, newpage, page); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -707,8 +730,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } redirty_page_for_writepage(wbc, page); - eb = btrfs_find_tree_block(root, page_offset(page), - PAGE_CACHE_SIZE); + eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); WARN_ON(!eb); was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); @@ -799,6 +821,7 @@ static const struct address_space_operations btree_aops = { .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, + .migratepage = btree_migratepage, }; int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, From 0c56fa9662927354255f2f64617d1de61fc03db9 Mon Sep 17 00:00:00 2001 From: Miao Xie <miaox@cn.fujitsu.com> Date: Mon, 22 Nov 2010 03:01:39 +0000 Subject: [PATCH 043/179] btrfs: fix free dip and dip->csums twice bio_endio() will free dip and dip->csums, so dip and dip->csums twice will be freed twice. Fix it. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5132c9af888a..8c027aa0020a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5731,7 +5731,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); if (ret) - goto out_err; + goto free_ordered; if (write && !skip_sum) { ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, @@ -5740,7 +5740,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, __btrfs_submit_bio_start_direct_io, __btrfs_submit_bio_done); if (ret) - goto out_err; + goto free_ordered; return; } else if (!skip_sum) btrfs_lookup_bio_sums_dio(root, inode, bio, @@ -5748,11 +5748,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ret = btrfs_map_bio(root, rw, bio, 0, 1); if (ret) - goto out_err; + goto free_ordered; return; -out_err: - kfree(dip->csums); - kfree(dip); free_ordered: /* * If this is a write, we need to clean up the reserved space and kill From 88f794ede7fadd4b63135b94d1561c1f2d5eb5f5 Mon Sep 17 00:00:00 2001 From: Miao Xie <miaox@cn.fujitsu.com> Date: Mon, 22 Nov 2010 03:02:55 +0000 Subject: [PATCH 044/179] btrfs: cleanup duplicate bio allocating functions extent_bio_alloc() and compressed_bio_alloc() are similar, cleanup similar source code. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/compression.c | 15 +-------------- fs/btrfs/extent_io.c | 8 ++++---- fs/btrfs/extent_io.h | 3 +++ 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7845d1f7d1d9..b50bc4bd5c56 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, static struct bio *compressed_bio_alloc(struct block_device *bdev, u64 first_byte, gfp_t gfp_flags) { - struct bio *bio; int nr_vecs; nr_vecs = bio_get_nr_vecs(bdev); - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_size = 0; - bio->bi_bdev = bdev; - bio->bi_sector = first_byte >> 9; - } - return bio; + return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); } static int check_compressed_csum(struct inode *inode, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3b7eaee0f912..f60aa3c35c23 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) bio_put(bio); } -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) +struct bio * +btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) { struct bio *bio; @@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, else nr = bio_get_nr_vecs(bdev); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1c6d4f342ef7..4183c8178f01 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, unsigned long op); +struct bio * +btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags); #endif From e65e1535542931e51189832264cd282e5899e4b9 Mon Sep 17 00:00:00 2001 From: Miao Xie <miaox@cn.fujitsu.com> Date: Mon, 22 Nov 2010 03:04:43 +0000 Subject: [PATCH 045/179] btrfs: fix panic caused by direct IO btrfs paniced when we write >64KB data by direct IO at one time. Reproduce steps: # mkfs.btrfs /dev/sda5 /dev/sda6 # mount /dev/sda5 /mnt # dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct Then btrfs paniced: mapping failed logical 1103155200 bio len 69632 len 12288 ------------[ cut here ]------------ kernel BUG at fs/btrfs/volumes.c:3010! [SNIP] Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY RIP: 0010:[<ffffffffa03d1462>] [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs] [SNIP] Call Trace: [<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs] [<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs] [<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs] [<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs] [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs] [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs] [<ffffffff81083216>] kthread+0x96/0xa0 [<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10 [<ffffffff81083180>] ? kthread+0x0/0xa0 [<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10 We fix this problem by splitting bios when we submit bios. Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 205 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 184 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8c027aa0020a..a47e4faa8c46 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5535,13 +5535,21 @@ struct btrfs_dio_private { u64 bytes; u32 *csums; void *private; + + /* number of bios pending for this dio */ + atomic_t pending_bios; + + /* IO errors */ + int errors; + + struct bio *orig_bio; }; static void btrfs_endio_direct_read(struct bio *bio, int err) { + struct btrfs_dio_private *dip = bio->bi_private; struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; - struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; u64 start; @@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, return 0; } +static void btrfs_end_dio_bio(struct bio *bio, int err) +{ + struct btrfs_dio_private *dip = bio->bi_private; + + if (err) { + printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " + "disk_bytenr %lu len %u err no %d\n", + dip->inode->i_ino, bio->bi_rw, bio->bi_sector, + bio->bi_size, err); + dip->errors = 1; + + /* + * before atomic variable goto zero, we must make sure + * dip->errors is perceived to be set. + */ + smp_mb__before_atomic_dec(); + } + + /* if there are more bios still pending for this dio, just exit */ + if (!atomic_dec_and_test(&dip->pending_bios)) + goto out; + + if (dip->errors) + bio_io_error(dip->orig_bio); + else { + set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); + bio_endio(dip->orig_bio, 0); + } +out: + bio_put(bio); +} + +static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, + u64 first_sector, gfp_t gfp_flags) +{ + int nr_vecs = bio_get_nr_vecs(bdev); + return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); +} + +static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, + int rw, u64 file_offset, int skip_sum, + u32 *csums) +{ + int write = rw & REQ_WRITE; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + bio_get(bio); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + if (ret) + goto err; + + if (write && !skip_sum) { + ret = btrfs_wq_submit_bio(root->fs_info, + inode, rw, bio, 0, 0, + file_offset, + __btrfs_submit_bio_start_direct_io, + __btrfs_submit_bio_done); + goto err; + } else if (!skip_sum) + btrfs_lookup_bio_sums_dio(root, inode, bio, + file_offset, csums); + + ret = btrfs_map_bio(root, rw, bio, 0, 1); +err: + bio_put(bio); + return ret; +} + +static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, + int skip_sum) +{ + struct inode *inode = dip->inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct bio *bio; + struct bio *orig_bio = dip->orig_bio; + struct bio_vec *bvec = orig_bio->bi_io_vec; + u64 start_sector = orig_bio->bi_sector; + u64 file_offset = dip->logical_offset; + u64 submit_len = 0; + u64 map_length; + int nr_pages = 0; + u32 *csums = dip->csums; + int ret = 0; + + bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); + if (!bio) + return -ENOMEM; + bio->bi_private = dip; + bio->bi_end_io = btrfs_end_dio_bio; + atomic_inc(&dip->pending_bios); + + map_length = orig_bio->bi_size; + ret = btrfs_map_block(map_tree, READ, start_sector << 9, + &map_length, NULL, 0); + if (ret) { + bio_put(bio); + return -EIO; + } + + while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { + if (unlikely(map_length < submit_len + bvec->bv_len || + bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset) < bvec->bv_len)) { + /* + * inc the count before we submit the bio so + * we know the end IO handler won't happen before + * we inc the count. Otherwise, the dip might get freed + * before we're done setting it up + */ + atomic_inc(&dip->pending_bios); + ret = __btrfs_submit_dio_bio(bio, inode, rw, + file_offset, skip_sum, + csums); + if (ret) { + bio_put(bio); + atomic_dec(&dip->pending_bios); + goto out_err; + } + + if (!skip_sum) + csums = csums + nr_pages; + start_sector += submit_len >> 9; + file_offset += submit_len; + + submit_len = 0; + nr_pages = 0; + + bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, + start_sector, GFP_NOFS); + if (!bio) + goto out_err; + bio->bi_private = dip; + bio->bi_end_io = btrfs_end_dio_bio; + + map_length = orig_bio->bi_size; + ret = btrfs_map_block(map_tree, READ, start_sector << 9, + &map_length, NULL, 0); + if (ret) { + bio_put(bio); + goto out_err; + } + } else { + submit_len += bvec->bv_len; + nr_pages ++; + bvec++; + } + } + + ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, + csums); + if (!ret) + return 0; + + bio_put(bio); +out_err: + dip->errors = 1; + /* + * before atomic variable goto zero, we must + * make sure dip->errors is perceived to be set. + */ + smp_mb__before_atomic_dec(); + if (atomic_dec_and_test(&dip->pending_bios)) + bio_io_error(dip->orig_bio); + + /* bio_end_io() will handle error, so we needn't return it */ + return 0; +} + static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, loff_t file_offset) { @@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, dip->disk_bytenr = (u64)bio->bi_sector << 9; bio->bi_private = dip; + dip->errors = 0; + dip->orig_bio = bio; + atomic_set(&dip->pending_bios, 0); if (write) bio->bi_end_io = btrfs_endio_direct_write; else bio->bi_end_io = btrfs_endio_direct_read; - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - if (ret) - goto free_ordered; - - if (write && !skip_sum) { - ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, 0, 0, - dip->logical_offset, - __btrfs_submit_bio_start_direct_io, - __btrfs_submit_bio_done); - if (ret) - goto free_ordered; + ret = btrfs_submit_direct_hook(rw, dip, skip_sum); + if (!ret) return; - } else if (!skip_sum) - btrfs_lookup_bio_sums_dio(root, inode, bio, - dip->logical_offset, dip->csums); - - ret = btrfs_map_bio(root, rw, bio, 0, 1); - if (ret) - goto free_ordered; - return; free_ordered: /* * If this is a write, we need to clean up the reserved space and kill From 6f33434850ed87dc5e56b60ebbad3d3cf405f296 Mon Sep 17 00:00:00 2001 From: Arne Jansen <sensille@gmx.net> Date: Fri, 12 Nov 2010 23:17:56 +0000 Subject: [PATCH 046/179] btrfs: Fix early enospc because 'unused' calculated with wrong sign. 'unused' calculated with wrong sign in reserve_metadata_bytes(). This might have lead to unwanted over-reservations. Signed-off-by: Arne Jansen <sensille@gmx.net> Reviewed-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a541bc87f04c..ddaf6340fe7f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3413,7 +3413,7 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, * our reservation. */ if (unused <= space_info->total_bytes) { - unused -= space_info->total_bytes; + unused = space_info->total_bytes - unused; if (unused >= num_bytes) { if (!reserved) space_info->bytes_reserved += orig_bytes; From 0de90876c6cb774d4a424dafc1fc9ec50071b81b Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Fri, 19 Nov 2010 13:40:41 +0000 Subject: [PATCH 047/179] Btrfs: handle the space_cache option properly When I added the clear_cache option I screwed up and took the break out of the space_cache case statement, so whenever you mount with space_cache you also get clear_cache, which does you no good if you say set space_cache in fstab so it always gets set. This patch adds the break back in properly. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 718b10de2049..66e4612a7916 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); + break; case Opt_clear_cache: printk(KERN_INFO "btrfs: force clearing of disk cache\n"); btrfs_set_opt(info->mount_opt, CLEAR_CACHE); From 2a6b8daedaf3682bed3fc1d4e2390491f6e19c49 Mon Sep 17 00:00:00 2001 From: Li Zefan <lizf@cn.fujitsu.com> Date: Fri, 19 Nov 2010 01:36:10 +0000 Subject: [PATCH 048/179] btrfs: Check if dest_offset is block-size aligned before cloning file We've done the check for src_offset and src_length, and We should also check dest_offset, otherwise we'll corrupt the destination file: (After cloning file1 to file2 with unaligned dest_offset) # cat /mnt/file2 cat: /mnt/file2: Input/output error Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/ioctl.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 463d91b4dd3a..81b47bd8a55a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1669,12 +1669,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, olen = len = src->i_size - off; /* if we extend to eof, continue to block boundary */ if (off + len == src->i_size) - len = ((src->i_size + bs-1) & ~(bs-1)) - - off; + len = ALIGN(src->i_size, bs) - off; /* verify the end result is block aligned */ - if ((off & (bs-1)) || - ((off + len) & (bs-1))) + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) goto out_unlock; /* do any pending delalloc/csum calc on src, one way or From 5f3888ff6f0b9dce60705765752b788a92557644 Mon Sep 17 00:00:00 2001 From: Li Zefan <lizf@cn.fujitsu.com> Date: Fri, 19 Nov 2010 01:36:34 +0000 Subject: [PATCH 049/179] btrfs: Set file size correctly in file clone Set src_offset = 0, src_length = 20K, dest_offset = 20K. And the original filesize of the dest file 'file2' is 30K: # ls -l /mnt/file2 -rw-r--r-- 1 root root 30720 Nov 18 16:42 /mnt/file2 Now clone file1 to file2, the dest file should be 40K, but it still shows 30K: # ls -l /mnt/file2 -rw-r--r-- 1 root root 30720 Nov 18 16:42 /mnt/file2 Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 81b47bd8a55a..6b4bfa72bf8d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1873,8 +1873,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * but shouldn't round up the file size */ endoff = new_key.offset + datal; - if (endoff > off+olen) - endoff = off+olen; + if (endoff > destoff+olen) + endoff = destoff+olen; if (endoff > inode->i_size) btrfs_i_size_write(inode, endoff); From f209561ad83c5ffd561dc4bc3a3c90b704fe9231 Mon Sep 17 00:00:00 2001 From: Li Zefan <lizf@cn.fujitsu.com> Date: Fri, 19 Nov 2010 02:05:24 +0000 Subject: [PATCH 050/179] btrfs: Show device attr correctly for symlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symlinks and files of other types show different device numbers, though they are on the same partition: $ touch tmp; ln -s tmp tmp2; stat tmp tmp2 File: `tmp' Size: 0 Blocks: 0 IO Block: 4096 regular empty file Device: 15h/21d Inode: 984027 Links: 1 --- snip --- File: `tmp2' -> `tmp' Size: 3 Blocks: 0 IO Block: 4096 symbolic link Device: 13h/19d Inode: 984028 Links: 1 Reported-by: Toke Høiland-Jørgensen <toke@toke.dk> Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a47e4faa8c46..eed357ff6c99 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7299,6 +7299,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .getattr = btrfs_getattr, .permission = btrfs_permission, .setxattr = btrfs_setxattr, .getxattr = btrfs_getxattr, From 0410c94aff109c02b6774a0ed00114987cda7ce5 Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski <mk@lab.zgora.pl> Date: Sat, 20 Nov 2010 12:03:07 +0000 Subject: [PATCH 051/179] btrfs: make 1-bit signed fileds unsigned Fixes these sparse warnings: fs/btrfs/ctree.h:811:17: error: dubious one-bit signed bitfield fs/btrfs/ctree.h:812:20: error: dubious one-bit signed bitfield fs/btrfs/ctree.h:813:19: error: dubious one-bit signed bitfield Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/ctree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8db9234f6b41..af52f6d7a4d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -808,9 +808,9 @@ struct btrfs_block_group_cache { int extents_thresh; int free_extents; int total_bitmaps; - int ro:1; - int dirty:1; - int iref:1; + unsigned int ro:1; + unsigned int dirty:1; + unsigned int iref:1; int disk_cache_state; From 2ede0daf01549cecf4bb0962c46dc47382047523 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Wed, 17 Nov 2010 18:54:54 +0000 Subject: [PATCH 052/179] Btrfs: handle NFS lookups properly People kept reporting NFS issues, specifically getting ESTALE alot. I figured out how to reproduce the problem SERVER mkfs.btrfs /dev/sda1 mount /dev/sda1 /mnt/btrfs-test <add /mnt/btrfs-test to /etc/exports> btrfs subvol create /mnt/btrfs-test/foo service nfs start CLIENT mount server:/mnt/btrfs /mnt/test cd /mnt/test/foo ls SERVER echo 3 > /proc/sys/vm/drop_caches CLIENT ls <-- get an ESTALE here This is because the standard way to lookup a name in nfsd is to use readdir, and what it does is do a readdir on the parent directory looking for the inode of the child. So in this case the parent being / and the child being foo. Well subvols all have the same inode number, so doing a readdir of / looking for inode 256 will return '.', which obviously doesn't match foo. So instead we need to have our own .get_name so that we can find the right name. Our .get_name will either lookup the inode backref or the root backref, whichever we're looking for, and return the name we find. Running the above reproducer with this patch results in everything acting the way its supposed to. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/export.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 951ef09b82f4..6f0444473594 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -232,9 +232,85 @@ static struct dentry *btrfs_get_parent(struct dentry *child) return ERR_PTR(ret); } +static int btrfs_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *inode = child->d_inode; + struct inode *dir = parent->d_inode; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_inode_ref *iref; + struct btrfs_root_ref *rref; + struct extent_buffer *leaf; + unsigned long name_ptr; + struct btrfs_key key; + int name_len; + int ret; + + if (!dir || !inode) + return -EINVAL; + + if (!S_ISDIR(dir->i_mode)) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = BTRFS_I(inode)->root->root_key.objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + root = root->fs_info->tree_root; + } else { + key.objectid = inode->i_ino; + key.offset = dir->i_ino; + key.type = BTRFS_INODE_REF_KEY; + } + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } else if (ret > 0) { + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + path->slots[0]--; + } else { + btrfs_free_path(path); + return -ENOENT; + } + } + leaf = path->nodes[0]; + + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + rref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + name_ptr = (unsigned long)(rref + 1); + name_len = btrfs_root_ref_name_len(leaf, rref); + } else { + iref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_ref); + name_ptr = (unsigned long)(iref + 1); + name_len = btrfs_inode_ref_name_len(leaf, iref); + } + + read_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_free_path(path); + + /* + * have to add the null termination to make sure that reconnect_path + * gets the right len for strlen + */ + name[name_len] = '\0'; + + return 0; +} + const struct export_operations btrfs_export_ops = { .encode_fh = btrfs_encode_fh, .fh_to_dentry = btrfs_fh_to_dentry, .fh_to_parent = btrfs_fh_to_parent, .get_parent = btrfs_get_parent, + .get_name = btrfs_get_name, }; From 76195853903ca613ba722203db9b747d70478fc7 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Fri, 19 Nov 2010 02:18:02 +0000 Subject: [PATCH 053/179] Btrfs: fix more ESTALE problems with NFS When creating new inodes we don't setup inode->i_generation. So if we generate an fh with a newly created inode we save the generation of 0, but if we flush the inode to disk and have to read it back when getting the inode on the server we'll have the right i_generation, so gens wont match and we get ESTALE. This patch properly sets inode->i_generation when we create the new inode and now I'm no longer getting ESTALE. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eed357ff6c99..fc22f556aa24 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->index_cnt = 2; BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; + inode->i_generation = BTRFS_I(inode)->generation; btrfs_set_inode_space_info(root, inode); if (mode & S_IFDIR) From 6a912213046ecb6511fdf35531a0c7de3de963c9 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Sat, 20 Nov 2010 09:48:00 +0000 Subject: [PATCH 054/179] Btrfs: use dget_parent where we can UPDATED There are lots of places where we do dentry->d_parent->d_inode without holding the dentry->d_lock. This could cause problems with rename. So instead we need to use dget_parent() and hold the reference to the parent as long as we are going to use it's inode and then dput it at the end. Signed-off-by: Josef Bacik <josef@redhat.com> Cc: raven@themaw.net Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 9 ++++++--- fs/btrfs/ioctl.c | 20 ++++++++++++++++---- fs/btrfs/transaction.c | 5 ++++- fs/btrfs/tree-log.c | 21 +++++++++++++++++---- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc22f556aa24..c0faf47d0cd9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4811,10 +4811,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) { drop_inode = 1; } else { + struct dentry *parent = dget_parent(dentry); btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); BUG_ON(err); - btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); + btrfs_log_new_name(trans, inode, NULL, parent); + dput(parent); } nr = trans->blocks_used; @@ -6768,8 +6770,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BUG_ON(ret); if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { - btrfs_log_new_name(trans, old_inode, old_dir, - new_dentry->d_parent); + struct dentry *parent = dget_parent(new_dentry); + btrfs_log_new_name(trans, old_inode, old_dir, parent); + dput(parent); btrfs_end_log_trans(root); } out_fail: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6b4bfa72bf8d..f1c9bb4079ed 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; struct btrfs_root *new_root; - struct inode *dir = dentry->d_parent->d_inode; + struct dentry *parent = dget_parent(dentry); + struct inode *dir; int ret; int err; u64 objectid; @@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 0, &objectid); - if (ret) + if (ret) { + dput(parent); return ret; + } + + dir = parent->d_inode; + /* * 1 - inode item * 2 - refs @@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root, * 2 - dir items */ trans = btrfs_start_transaction(root, 6); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { + dput(parent); return PTR_ERR(trans); + } leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); @@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root, d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: + dput(parent); if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); @@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid) { struct inode *inode; + struct dentry *parent; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; int ret; @@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, btrfs_orphan_cleanup(pending_snapshot->snap); - inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); + parent = dget_parent(dentry); + inode = btrfs_lookup_dentry(parent->d_inode, dentry); + dput(parent); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto fail; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1fffbc017bdf..f50e931fc217 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; struct inode *parent_inode; + struct dentry *parent; struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; @@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, trans->block_rsv = &pending->block_rsv; dentry = pending->dentry; - parent_inode = dentry->d_parent->d_inode; + parent = dget_parent(dentry); + parent_inode = parent->d_inode; parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root); @@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_inode->i_ino, index, dentry->d_name.name, dentry->d_name.len); BUG_ON(ret); + dput(parent); key.offset = (u64)-1; pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a29f19384a27..054744ac5719 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_root *root; + struct dentry *old_parent = NULL; /* * for regular files, if its inode is already on disk, we don't @@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) break; - parent = parent->d_parent; + parent = dget_parent(parent); + dput(old_parent); + old_parent = parent; inode = parent->d_inode; } + dput(old_parent); out: return ret; } @@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; + struct dentry *old_parent = NULL; int ret = 0; u64 last_committed = root->fs_info->last_trans_committed; @@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) break; - parent = parent->d_parent; + parent = dget_parent(parent); + dput(old_parent); + old_parent = parent; } ret = 0; end_trans: + dput(old_parent); if (ret < 0) { BUG_ON(ret != -ENOSPC); root->fs_info->last_trans_log_full_commit = trans->transid; @@ -3039,8 +3047,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry) { - return btrfs_log_inode_parent(trans, root, dentry->d_inode, - dentry->d_parent, 0); + struct dentry *parent = dget_parent(dentry); + int ret; + + ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); + dput(parent); + + return ret; } /* From 495e86779f4f319828bc10dfc0c9ac2161868077 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Fri, 19 Nov 2010 20:36:10 +0000 Subject: [PATCH 055/179] Btrfs: hold i_mutex when calling btrfs_log_dentry_safe Since we walk up the path logging all of the parts of the inode's path, we need to hold i_mutex to make sure that the inode is not renamed while we're logging everything. btrfs_log_dentry_safe does dget_parent and all of that jazz, but we may get unexpected results if the rename changes the inode's location while we're higher up the path logging those dentries, so do this for safety reasons. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df082..c1faded5fca0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1047,8 +1047,14 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + num_written = PTR_ERR(trans); + goto done; + } + mutex_lock(&inode->i_mutex); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + mutex_unlock(&inode->i_mutex); if (ret == 0) { ret = btrfs_sync_log(trans, root); if (ret == 0) @@ -1067,6 +1073,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } } +done: current->backing_dev_info = NULL; return num_written ? num_written : err; } From a1b075d28da563c5e2325577f282c042494254ba Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Fri, 19 Nov 2010 20:36:11 +0000 Subject: [PATCH 056/179] Btrfs: make btrfs_add_nondir take parent inode as an argument Everybody who calls btrfs_add_nondir just passes in the dentry of the new file and then dereference dentry->d_parent->d_inode, but everybody who calls btrfs_add_nondir() are already passed the parent's inode. So instead of dereferencing dentry->d_parent, just make btrfs_add_nondir take the dir inode as an argument and pass that along so we don't have to worry about d_parent. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0faf47d0cd9..37cc1776a5d7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4623,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, } static int btrfs_add_nondir(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode, - int backref, u64 index) + struct inode *dir, struct dentry *dentry, + struct inode *inode, int backref, u64 index) { - int err = btrfs_add_link(trans, dentry->d_parent->d_inode, - inode, dentry->d_name.name, - dentry->d_name.len, backref, index); + int err = btrfs_add_link(trans, dir, inode, + dentry->d_name.name, dentry->d_name.len, + backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -4669,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -4683,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -4731,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, - objectid, BTRFS_I(dir)->block_group, mode, - &index); + dentry->d_name.len, dir->i_ino, objectid, + BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -4746,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -4806,7 +4803,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); - err = btrfs_add_nondir(trans, dentry, inode, 1, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); if (err) { drop_inode = 1; @@ -4856,8 +4853,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode, &index); if (IS_ERR(inode)) { @@ -4880,9 +4876,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry->d_parent->d_inode, - inode, dentry->d_name.name, - dentry->d_name.len, 0, index); + err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, + dentry->d_name.len, 0, index); if (err) goto out_fail; @@ -6922,8 +6917,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, &index); err = PTR_ERR(inode); @@ -6937,7 +6931,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { From 45f49bce99d008d6864a20324548f35936ba46fb Mon Sep 17 00:00:00 2001 From: Chris Mason <chris.mason@oracle.com> Date: Sun, 21 Nov 2010 22:27:44 -0500 Subject: [PATCH 057/179] Btrfs: avoid NULL pointer deref in try_release_extent_buffer If we fail to find a pointer in the radix tree, don't try to deref the NULL one we do have. Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/extent_io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f60aa3c35c23..143d3f541d64 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3837,8 +3837,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) spin_lock(&tree->buffer_lock); eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (!eb) - goto out; + if (!eb) { + spin_unlock(&tree->buffer_lock); + return ret; + } if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { ret = 0; From 8575d93386d6ce9a3d4961134018d4e6c6bed618 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov <segoon@openwall.com> Date: Sun, 21 Nov 2010 20:40:21 +0300 Subject: [PATCH 058/179] ASoC: s3c24xx: test wrong variable After clk_get() mclk is checked three times instead of mout_epll and sclk_spdif checks. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/s3c24xx/smdk_spdif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/s3c24xx/smdk_spdif.c b/sound/soc/s3c24xx/smdk_spdif.c index f31d22ad7c88..c8bd90488a87 100644 --- a/sound/soc/s3c24xx/smdk_spdif.c +++ b/sound/soc/s3c24xx/smdk_spdif.c @@ -38,7 +38,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev) } mout_epll = clk_get(NULL, "mout_epll"); - if (IS_ERR(fout_epll)) { + if (IS_ERR(mout_epll)) { printk(KERN_WARNING "%s: Cannot find mout_epll.\n", __func__); ret = -EINVAL; @@ -54,7 +54,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev) } sclk_spdif = clk_get(NULL, "sclk_spdif"); - if (IS_ERR(fout_epll)) { + if (IS_ERR(sclk_spdif)) { printk(KERN_WARNING "%s: Cannot find sclk_spdif.\n", __func__); ret = -EINVAL; From 13a2e06c5898d27aadabfdb9830169101b21432f Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Mon, 22 Nov 2010 08:20:54 +0800 Subject: [PATCH 059/179] ASoC: stac9766 - set reg_cache_default to stac9766_reg Looks like this is missing during multi-component conversion. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/stac9766.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c index 00d67cc8e206..061f9e5a497b 100644 --- a/sound/soc/codecs/stac9766.c +++ b/sound/soc/codecs/stac9766.c @@ -383,6 +383,7 @@ static struct snd_soc_codec_driver soc_codec_dev_stac9766 = { .reg_cache_size = sizeof(stac9766_reg), .reg_word_size = sizeof(u16), .reg_cache_step = 2, + .reg_cache_default = stac9766_reg, }; static __devinit int stac9766_probe(struct platform_device *pdev) From f570e1dd8469d39420f406a4f5442c270b1e759e Mon Sep 17 00:00:00 2001 From: Tracey Dent <tdent48227@gmail.com> Date: Sun, 7 Nov 2010 09:43:33 -0500 Subject: [PATCH 060/179] EDAC: Remove deprecated kbuild goal definitions Change EDAC's Makefile to use <modules>-y instead of <modules>-objs because -objs is deprecated and not mentioned in Documentation/kbuild/makefiles.txt. [bp: Fixup commit message] [bp: Fixup indentation] Signed-off-by: Tracey Dent <tdent48227@gmail.com> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> --- drivers/edac/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index b3781399b38a..ba2898b3639b 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -10,16 +10,16 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o obj-$(CONFIG_EDAC_MCE) += edac_mce.o -edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o -edac_core-objs += edac_module.o edac_device_sysfs.o +edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-y += edac_module.o edac_device_sysfs.o ifdef CONFIG_PCI -edac_core-objs += edac_pci.o edac_pci_sysfs.o +edac_core-y += edac_pci.o edac_pci_sysfs.o endif obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o -edac_mce_amd-objs := mce_amd.o +edac_mce_amd-y := mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o From df4b2a30e0b9e553abfd63ee75dbbd66be80f01a Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 18 Nov 2010 22:05:43 -0500 Subject: [PATCH 061/179] EDAC, MCE: Fix edac_init_mce_inject error handling Otherwise, variable i will be -1 inside the latest iteration of the while loop. Signed-off-by: Axel Lin <axel.lin@gmail.com> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> --- drivers/edac/mce_amd_inj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 8d0688f36d4c..39faded3cadd 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -139,7 +139,7 @@ static int __init edac_init_mce_inject(void) return 0; err_sysfs_create: - while (i-- >= 0) + while (--i >= 0) sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); kobject_del(mce_kobj); From c1a3a4b90a5a47adcca0e587f5d7e9ea61329b26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 22 Nov 2010 14:01:55 -0200 Subject: [PATCH 062/179] perf record: Handle restrictive permissions in /proc/{kallsyms,modules} The 59365d1 commit, even being reverted by 33e0d57, showed a non robust behavior in 'perf record': it really should just warn the user that some functionality will not be available. The new behavior then becomes: [acme@felicio linux]$ ls -la /proc/{kallsyms,modules} -r-------- 1 root root 0 Nov 22 12:19 /proc/kallsyms -r-------- 1 root root 0 Nov 22 12:19 /proc/modules [acme@felicio linux]$ perf record ls -R > /dev/null Couldn't record kernel reference relocation symbol Symbol resolution may be skewed if relocation was used (e.g. kexec). Check /proc/kallsyms permission or run as root. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB perf.data (~161 samples) ] [acme@felicio linux]$ perf report --stdio [kernel.kallsyms] with build id 77b05e00e64e4de1c9347d83879779b540d69f00 not found, continuing without symbols # Events: 98 cycles # # Overhead Command Shared Object Symbol # ........ ....... ............... .................... # 48.26% ls [kernel] [k] ffffffff8102b92b 22.49% ls libc-2.12.90.so [.] __strlen_sse2 8.35% ls libc-2.12.90.so [.] __GI___strcoll_l 8.17% ls ls [.] 11580 3.35% ls libc-2.12.90.so [.] _IO_new_file_xsputn 3.33% ls libc-2.12.90.so [.] _int_malloc 1.88% ls libc-2.12.90.so [.] _int_free 0.84% ls libc-2.12.90.so [.] malloc_consolidate 0.84% ls libc-2.12.90.so [.] __readdir64 0.83% ls ls [.] strlen@plt 0.83% ls libc-2.12.90.so [.] __GI_fwrite_unlocked 0.83% ls libc-2.12.90.so [.] __memcpy_sse2 # # (For a higher level overview, try: perf report --sort comm,dso) # [acme@felicio linux]$ It still has the build-ids for DSOs in the maps with hits: [acme@felicio linux]$ perf buildid-list 77b05e00e64e4de1c9347d83879779b540d69f00 [kernel.kallsyms] 09c4a431a4a8b648fcfc2c2bdda70f56050ddff1 /bin/ls af75ea9ad951d25e0f038901a11b3846dccb29a4 /lib64/libc-2.12.90.so [acme@felicio linux]$ That can be used in another machine to resolve kernel symbols. Cc: Eugene Teo <eugeneteo@kernel.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jesper Juhl <jj@chaosbits.net> Cc: Marcus Meissner <meissner@suse.de> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com> Cc: Stephane Eranian <eranian@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Tom Zanussi <tzanussi@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-record.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 93bd2ff001fb..e2c2de201eec 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -697,17 +697,18 @@ static int __cmd_record(int argc, const char **argv) if (err < 0) err = event__synthesize_kernel_mmap(process_synthesized_event, session, machine, "_stext"); - if (err < 0) { - pr_err("Couldn't record kernel reference relocation symbol.\n"); - return err; - } + if (err < 0) + pr_err("Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); err = event__synthesize_modules(process_synthesized_event, session, machine); - if (err < 0) { - pr_err("Couldn't record kernel reference relocation symbol.\n"); - return err; - } + if (err < 0) + pr_err("Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + if (perf_guest) perf_session__process_machines(session, event__synthesize_guest_os); From 5fc43978a79e8021c189660ab63249fd29c5fb32 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 11:13:31 -0500 Subject: [PATCH 063/179] SUNRPC: Fix an infinite loop in call_refresh/call_refreshresult If the rpcauth_refreshcred() call returns an error other than EACCES, ENOMEM or ETIMEDOUT, we currently end up looping forever between call_refresh and call_refreshresult. The correct thing to do here is to exit on all errors except EAGAIN and ETIMEDOUT, for which case we retry 3 times, then return EACCES. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/clnt.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9dab9573be41..92ce94f5146b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -989,20 +989,26 @@ call_refreshresult(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_allocate; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; + task->tk_action = call_refresh; switch (status) { - case -EACCES: - rpc_exit(task, -EACCES); - return; - case -ENOMEM: - rpc_exit(task, -ENOMEM); + case 0: + if (rpcauth_uptodatecred(task)) + task->tk_action = call_allocate; return; case -ETIMEDOUT: rpc_delay(task, 3*HZ); + case -EAGAIN: + status = -EACCES; + if (!task->tk_cred_retry) + break; + task->tk_cred_retry--; + dprintk("RPC: %5u %s: retry refresh creds\n", + task->tk_pid, __func__); + return; } - task->tk_action = call_refresh; + dprintk("RPC: %5u %s: refresh creds failed with error %d\n", + task->tk_pid, __func__, status); + rpc_exit(task, status); } /* From b47d19de2c714020ba8f5545a6e7d4968f37eb45 Mon Sep 17 00:00:00 2001 From: Arun Bharadwaj <arun@linux.vnet.ibm.com> Date: Thu, 18 Nov 2010 10:36:43 +0000 Subject: [PATCH 064/179] Pure nfs client performance using odirect. When an application opens a file with O_DIRECT flag, if the size of the data that is written is equal to wsize, the client sends a WRITE RPC with stable flag set to UNSTABLE followed by a single COMMIT RPC rather than sending a single WRITE RPC with the stable flag set to FILE_SYNC. This a bug. Patch to fix this. Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 84d3c8b90206..e6ace0d93c71 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, goto out; nfs_alloc_commit_data(dreq); - if (dreq->commit_data == NULL || count < wsize) + if (dreq->commit_data == NULL || count <= wsize) sync = NFS_FILE_SYNC; dreq->inode = inode; From 463a376eae1c92a66c912af539bfd4bbefa37673 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 12:22:20 -0500 Subject: [PATCH 065/179] NFS: Buffer overflow in ->decode_dirent() should not be fatal Overflowing the buffer in the readdir ->decode_dirent() should not lead to a fatal error, but rather to an attempt to reread the record in question. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 2563f765c9b4..ab5937734f0b 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -495,7 +495,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se out_overflow: print_overflow_msg(__func__, xdr); - return ERR_PTR(-EIO); + return ERR_PTR(-EAGAIN); } /* diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 748dc91a4a14..e79e4f5f5d53 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -656,7 +656,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s out_overflow: print_overflow_msg(__func__, xdr); out_overflow_exit: - return ERR_PTR(-EIO); + return ERR_PTR(-EAGAIN); } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b7a204ff6fe1..a3b39cbc90f9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6221,7 +6221,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, out_overflow: print_overflow_msg(__func__, xdr); - return ERR_PTR(-EIO); + return ERR_PTR(-EAGAIN); } /* From 5c346854d8ce6ca91931f8fc9177934257a667d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 12:43:45 -0500 Subject: [PATCH 066/179] NFS: Assume eof if the server returns no readdir records Some servers are known to be buggy w.r.t. this. Deal with them... Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 662df2a5fad5..2789cb3fc425 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -466,8 +466,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct xdr_stream stream; struct xdr_buf buf; __be32 *ptr = xdr_page; - int status; struct nfs_cache_array *array; + unsigned int count = 0; + int status; buf.head->iov_base = xdr_page; buf.head->iov_len = buflen; @@ -488,6 +489,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en break; } + count++; + if (desc->plus == 1) nfs_prime_dcache(desc->file->f_path.dentry, entry); @@ -496,13 +499,14 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en break; } while (!entry->eof); - if (status == -EBADCOOKIE && entry->eof) { + if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) { array = nfs_readdir_get_array(page); if (!IS_ERR(array)) { array->eof_index = array->size; status = 0; nfs_readdir_release_array(page); - } + } else + status = PTR_ERR(array); } return status; } From e7c58e974a0318fcca5368e7b3570e10e9ae9028 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 13:22:24 -0500 Subject: [PATCH 067/179] NFS: Fix a page leak in nfs_do_filldir() nfs_do_filldir() must always free desc->page when it is done, otherwise we end up leaking the page. Also remove unused variable 'dentry'. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2789cb3fc425..42e66e961d73 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -701,11 +701,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int res = 0; struct nfs_cache_array *array = NULL; unsigned int d_type = DT_UNKNOWN; - struct dentry *dentry = NULL; array = nfs_readdir_get_array(desc->page); - if (IS_ERR(array)) - return PTR_ERR(array); + if (IS_ERR(array)) { + res = PTR_ERR(array); + goto out; + } for (i = desc->cache_entry_index; i < array->size; i++) { d_type = DT_UNKNOWN; @@ -726,9 +727,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, desc->eof = 1; nfs_readdir_release_array(desc->page); +out: cache_page_release(desc); - if (dentry != NULL) - dput(dentry); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; From 7a8e1dc34f52fd2927dbf7e520d7cd8eadc51336 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 13:24:46 -0500 Subject: [PATCH 068/179] NFS: Fix a page leak in uncached_readdir() Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 42e66e961d73..353f47c31b1d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -763,13 +763,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } + desc->page_index = 0; + desc->page = page; + if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) { status = -EIO; goto out_release; } - desc->page_index = 0; - desc->page = page; status = nfs_do_filldir(desc, dirent, filldir); out: From 85f8607e163f8d281fb407357279cb4ac6df12e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 13:24:49 -0500 Subject: [PATCH 069/179] NFS: Fix the error handling in "uncached_readdir()" Currently, uncached_readdir() is broken because if fails to handle the results from nfs_readdir_xdr_to_array() correctly. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 353f47c31b1d..2492bacf68a7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -766,10 +766,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->page_index = 0; desc->page = page; - if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) { - status = -EIO; + status = nfs_readdir_xdr_to_array(desc, page, inode); + if (status < 0) goto out_release; - } status = nfs_do_filldir(desc, dirent, filldir); From ece0b4233b6b915d1f63add2bd9f2733aec6317a Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 13:55:33 -0500 Subject: [PATCH 070/179] NFS: Don't ignore errors from nfs_do_filldir() We should ignore the errors from the filldir callback, and just interpret them as meaning we should exit, however we should definitely pass back ENOMEM errors. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2492bacf68a7..ddc2e439702b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -709,13 +709,15 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } for (i = desc->cache_entry_index; i < array->size; i++) { + struct nfs_cache_array_entry *ent; d_type = DT_UNKNOWN; - res = filldir(dirent, array->array[i].string.name, - array->array[i].string.len, file->f_pos, - nfs_compat_user_ino64(array->array[i].ino), d_type); - if (res < 0) + ent = &array->array[i]; + if (filldir(dirent, ent->string.name, ent->string.len, + file->f_pos, nfs_compat_user_ino64(ent->ino), d_type) < 0) { + desc->eof = 1; break; + } file->f_pos++; desc->cache_entry_index = i; if (i < (array->size-1)) @@ -820,14 +822,14 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { + res = 0; /* This means either end of directory */ if (*desc->dir_cookie && desc->eof == 0) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); - if (res >= 0) + if (res == 0) continue; } - res = 0; break; } if (res == -ETOOSMALL && desc->plus) { @@ -842,10 +844,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; res = nfs_do_filldir(desc, dirent, filldir); - if (res < 0) { - res = 0; + if (res < 0) break; - } } out: nfs_unblock_sillyrename(dentry); From 3020093f578fb6c9acc6914dfd887a1ebd1db659 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 15:18:22 -0500 Subject: [PATCH 071/179] NFS: Correct the array bound calculation in nfs_readdir_add_to_array It looks as if the array size calculation in MAX_READDIR_ARRAY does not take the alignment of struct nfs_cache_array_entry into account. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ddc2e439702b..ced7291cc5f8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -171,8 +171,6 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[0]; }; -#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) - typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); typedef struct { struct file *file; @@ -257,11 +255,14 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) if (IS_ERR(array)) return PTR_ERR(array); - ret = -ENOSPC; - if (array->size >= MAX_READDIR_ARRAY) - goto out; cache_entry = &array->array[array->size]; + + /* Check that this entry lies within the page bounds */ + ret = -ENOSPC; + if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE) + goto out; + cache_entry->cookie = entry->prev_cookie; cache_entry->ino = entry->ino; ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); From 0b26a0bf6ff398185546432420bb772bcfdf8d94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 20 Nov 2010 14:26:44 -0500 Subject: [PATCH 072/179] NFS: Ensure we return the dirent->d_type when it is known Store the dirent->d_type in the struct nfs_cache_array_entry so that we can use it in getdents() calls. This fixes a regression with the new readdir code. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 7 ++++--- fs/nfs/internal.h | 9 +++++++++ fs/nfs/nfs2xdr.c | 2 ++ fs/nfs/nfs3xdr.c | 2 ++ fs/nfs/nfs4xdr.c | 4 ++++ include/linux/nfs_xdr.h | 1 + 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ced7291cc5f8..8ea4a4180a87 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -162,6 +162,7 @@ struct nfs_cache_array_entry { u64 cookie; u64 ino; struct qstr string; + unsigned char d_type; }; struct nfs_cache_array { @@ -265,6 +266,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) cache_entry->cookie = entry->prev_cookie; cache_entry->ino = entry->ino; + cache_entry->d_type = entry->d_type; ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); if (ret) goto out; @@ -701,7 +703,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int i = 0; int res = 0; struct nfs_cache_array *array = NULL; - unsigned int d_type = DT_UNKNOWN; array = nfs_readdir_get_array(desc->page); if (IS_ERR(array)) { @@ -711,11 +712,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; - d_type = DT_UNKNOWN; ent = &array->array[i]; if (filldir(dirent, ent->string.name, ent->string.len, - file->f_pos, nfs_compat_user_ino64(ent->ino), d_type) < 0) { + file->f_pos, nfs_compat_user_ino64(ent->ino), + ent->d_type) < 0) { desc->eof = 1; break; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index db08ff3ff454..e6356b750b77 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -361,6 +361,15 @@ unsigned int nfs_page_length(struct page *page) return 0; } +/* + * Convert a umode to a dirent->d_type + */ +static inline +unsigned char nfs_umode_to_dtype(umode_t mode) +{ + return (mode >> 12) & 15; +} + /* * Determine the number of pages in an array of length 'len' and * with a base offset of 'base' diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index ab5937734f0b..5914a1911c95 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se entry->prev_cookie = entry->cookie; entry->cookie = ntohl(*p++); + entry->d_type = DT_UNKNOWN; + p = xdr_inline_peek(xdr, 8); if (p != NULL) entry->eof = !p[0] && p[1]; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e79e4f5f5d53..f6cc60f06dac 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s entry->prev_cookie = entry->cookie; p = xdr_decode_hyper(p, &entry->cookie); + entry->d_type = DT_UNKNOWN; if (plus) { entry->fattr->valid = 0; p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); if (IS_ERR(p)) goto out_overflow_exit; + entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); /* In fact, a post_op_fh3: */ p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a3b39cbc90f9..9f1826b012e6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) entry->ino = entry->fattr->fileid; + entry->d_type = DT_UNKNOWN; + if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) + entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + if (verify_attr_len(xdr, p, len) < 0) goto out_overflow; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ba6cc8f223c9..80f07198a31a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -483,6 +483,7 @@ struct nfs_entry { int eof; struct nfs_fh * fh; struct nfs_fattr * fattr; + unsigned char d_type; }; /* From 15ec44611904be0dcc97b84c29fbf964e5e2b36f Mon Sep 17 00:00:00 2001 From: Philip Rakity <prakity@marvell.com> Date: Fri, 19 Nov 2010 16:48:39 -0500 Subject: [PATCH 073/179] mmc: sdhci: 8-bit bus width changes We now: * check for a v3 controller before setting 8-bit bus width * offer a callback for platform code to switch to 8-bit mode, which allows non-v3 controllers to support it * rely on mmc->caps |= MMC_CAP_8_BIT_DATA; in platform code to specify that the board designers have indeed brought out all the pins for 8-bit to the slot. We were previously relying only on whether the *controller* supported 8-bit, which doesn't tell us anything about the pin configuration in the board design. This fixes the MMC card regression reported by Maxim Levitsky here: http://thread.gmane.org/gmane.linux.kernel.mmc/4336 by no longer assuming that 8-bit works by default. Signed-off-by: Philip Rakity <prakity@marvell.com> Tested-by: Giuseppe Cavallaro <peppe.cavallaro@st.com> Signed-off-by: Chris Ball <cjb@laptop.org> --- arch/arm/plat-pxa/include/plat/sdhci.h | 3 ++ drivers/mmc/host/sdhci-pxa.c | 4 +++ drivers/mmc/host/sdhci.c | 44 +++++++++++++++++++------- drivers/mmc/host/sdhci.h | 5 ++- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h index e49c5b6fc4e2..1ab332e37d7d 100644 --- a/arch/arm/plat-pxa/include/plat/sdhci.h +++ b/arch/arm/plat-pxa/include/plat/sdhci.h @@ -17,6 +17,9 @@ /* Require clock free running */ #define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0) +/* Board design supports 8-bit data on SD/SDIO BUS */ +#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2) + /* * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI * @max_speed: the maximum speed supported diff --git a/drivers/mmc/host/sdhci-pxa.c b/drivers/mmc/host/sdhci-pxa.c index fc406ac5d193..5a61208cbc66 100644 --- a/drivers/mmc/host/sdhci-pxa.c +++ b/drivers/mmc/host/sdhci-pxa.c @@ -141,6 +141,10 @@ static int __devinit sdhci_pxa_probe(struct platform_device *pdev) if (pdata->quirks) host->quirks |= pdata->quirks; + /* If slot design supports 8 bit data, indicate this to MMC. */ + if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) + host->mmc->caps |= MMC_CAP_8_BIT_DATA; + ret = sdhci_add_host(host); if (ret) { dev_err(&pdev->dev, "failed to add host\n"); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 154cbf83c1ab..a25db426c910 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1185,18 +1185,32 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (host->ops->platform_send_init_74_clocks) host->ops->platform_send_init_74_clocks(host, ios->power_mode); + /* + * If your platform has 8-bit width support but is not a v3 controller, + * or if it requires special setup code, you should implement that in + * platform_8bit_width(). + */ + if (host->ops->platform_8bit_width) + host->ops->platform_8bit_width(host, ios->bus_width); + else { + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); + if (ios->bus_width == MMC_BUS_WIDTH_8) { + ctrl &= ~SDHCI_CTRL_4BITBUS; + if (host->version >= SDHCI_SPEC_300) + ctrl |= SDHCI_CTRL_8BITBUS; + } else { + if (host->version >= SDHCI_SPEC_300) + ctrl &= ~SDHCI_CTRL_8BITBUS; + if (ios->bus_width == MMC_BUS_WIDTH_4) + ctrl |= SDHCI_CTRL_4BITBUS; + else + ctrl &= ~SDHCI_CTRL_4BITBUS; + } + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); + } + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); - if (ios->bus_width == MMC_BUS_WIDTH_8) - ctrl |= SDHCI_CTRL_8BITBUS; - else - ctrl &= ~SDHCI_CTRL_8BITBUS; - - if (ios->bus_width == MMC_BUS_WIDTH_4) - ctrl |= SDHCI_CTRL_4BITBUS; - else - ctrl &= ~SDHCI_CTRL_4BITBUS; - if ((ios->timing == MMC_TIMING_SD_HS || ios->timing == MMC_TIMING_MMC_HS) && !(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) @@ -1855,11 +1869,19 @@ int sdhci_add_host(struct sdhci_host *host) mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; + mmc->f_max = host->max_clk; mmc->caps |= MMC_CAP_SDIO_IRQ; + /* + * A controller may support 8-bit width, but the board itself + * might not have the pins brought out. Boards that support + * 8-bit width must set "mmc->caps |= MMC_CAP_8_BIT_DATA;" in + * their platform code before calling sdhci_add_host(), and we + * won't assume 8-bit width for hosts without that CAP. + */ if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) - mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA; + mmc->caps |= MMC_CAP_4_BIT_DATA; if (caps & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d52a7163b97a..e42d7f00c060 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -76,7 +76,7 @@ #define SDHCI_CTRL_ADMA1 0x08 #define SDHCI_CTRL_ADMA32 0x10 #define SDHCI_CTRL_ADMA64 0x18 -#define SDHCI_CTRL_8BITBUS 0x20 +#define SDHCI_CTRL_8BITBUS 0x20 #define SDHCI_POWER_CONTROL 0x29 #define SDHCI_POWER_ON 0x01 @@ -155,6 +155,7 @@ #define SDHCI_CLOCK_BASE_SHIFT 8 #define SDHCI_MAX_BLOCK_MASK 0x00030000 #define SDHCI_MAX_BLOCK_SHIFT 16 +#define SDHCI_CAN_DO_8BIT 0x00040000 #define SDHCI_CAN_DO_ADMA2 0x00080000 #define SDHCI_CAN_DO_ADMA1 0x00100000 #define SDHCI_CAN_DO_HISPD 0x00200000 @@ -215,6 +216,8 @@ struct sdhci_ops { unsigned int (*get_max_clock)(struct sdhci_host *host); unsigned int (*get_min_clock)(struct sdhci_host *host); unsigned int (*get_timeout_clock)(struct sdhci_host *host); + int (*platform_8bit_width)(struct sdhci_host *host, + int width); void (*platform_send_init_74_clocks)(struct sdhci_host *host, u8 power_mode); unsigned int (*get_ro)(struct sdhci_host *host); From d47844a014fada1a788719f6426bc7044f2a0fd8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau <nbd@openwrt.org> Date: Sat, 20 Nov 2010 03:08:47 +0100 Subject: [PATCH 074/179] ath9k: fix timeout on stopping rx dma It seems that using ath9k_hw_stoppcurecv to stop rx dma is not enough. When it's time to stop DMA, the PCU is still busy, so the rx enable bit never clears. Using ath9k_hw_abortpcurecv helps with getting rx stopped much faster, with this change, I cannot reproduce the rx stop related WARN_ON anymore. Signed-off-by: Felix Fietkau <nbd@openwrt.org> Cc: stable@kernel.org Signed-off-by: John W. Linville <linville@tuxdriver.com> --- drivers/net/wireless/ath/ath9k/recv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index c76ea53c20ce..1a62e351ec77 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -518,7 +518,7 @@ bool ath_stoprecv(struct ath_softc *sc) bool stopped; spin_lock_bh(&sc->rx.rxbuflock); - ath9k_hw_stoppcurecv(ah); + ath9k_hw_abortpcurecv(ah); ath9k_hw_setrxfilter(ah, 0); stopped = ath9k_hw_stopdmarecv(ah); From 1d8638d4038eb8709edc80e37a0bbb77253d86e9 Mon Sep 17 00:00:00 2001 From: Daniel Klaffenbach <danielklaffenbach@gmail.com> Date: Fri, 19 Nov 2010 21:25:21 -0600 Subject: [PATCH 075/179] ssb: b43-pci-bridge: Add new vendor for BCM4318 Add new vendor for Broadcom 4318. Signed-off-by: Daniel Klaffenbach <danielklaffenbach@gmail.com> Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net> Cc: Stable <stable@kernel.org> Signed-off-by: John W. Linville <linville@tuxdriver.com> --- drivers/ssb/b43_pci_bridge.c | 1 + include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/ssb/b43_pci_bridge.c b/drivers/ssb/b43_pci_bridge.c index ef9c6a04ad8f..744d3f6e4709 100644 --- a/drivers/ssb/b43_pci_bridge.c +++ b/drivers/ssb/b43_pci_bridge.c @@ -24,6 +24,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) }, + { PCI_DEVICE(PCI_VENDOR_ID_BCM_GVC, 0x4318) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4321) }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d278dd9cb765..f29c25ede70d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2043,6 +2043,7 @@ #define PCI_DEVICE_ID_AFAVLAB_P030 0x2182 #define PCI_SUBDEVICE_ID_AFAVLAB_P061 0x2150 +#define PCI_VENDOR_ID_BCM_GVC 0x14a4 #define PCI_VENDOR_ID_BROADCOM 0x14e4 #define PCI_DEVICE_ID_TIGON3_5752 0x1600 #define PCI_DEVICE_ID_TIGON3_5752M 0x1601 From b397492a8c1022887a9b2fb925fe92e69ce0ad4d Mon Sep 17 00:00:00 2001 From: Christian Lamparter <chunkeey@googlemail.com> Date: Sat, 20 Nov 2010 13:15:27 +0100 Subject: [PATCH 076/179] carl9170: fix virtual interface setup crash This patch fixes a faulty bound check which caused a crash when too many virtual interface were brought up. BUG: unable to handle kernel NULL pointer dereference at 00000004 IP: [<f8125f67>] carl9170_op_add_interface+0x1d7/0x2c0 [carl9170] *pde = 00000000 Oops: 0002 [#1] PREEMPT Modules linked in: carl9170 [...] Pid: 4720, comm: wpa_supplicant Not tainted 2.6.37-rc2-wl+ EIP: 0060:[<f8125f67>] EFLAGS: 00210206 CPU: 0 EIP is at carl9170_op_add_interface+0x1d7/0x2c0 [carl9170] EAX: 00000000 ... Process wpa_supplicant Stack: f4f88f34 fffffff4 .. Call Trace: [<f8f4e666>] ? ieee80211_do_open+0x406/0x5c0 [mac80211] [...] Code: <89> 42 04 ... EIP: [<f8125f67>] carl9170_op_add_interface+0x1d7/0x2c0 [carl9170] CR2: 0000000000000004 Signed-off-by: Christian Lamparter <chunkeey@googlemail.com> Signed-off-by: John W. Linville <linville@tuxdriver.com> --- drivers/net/wireless/ath/carl9170/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 980ae70ea424..a314c2c2bfbe 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -647,7 +647,7 @@ static int carl9170_op_add_interface(struct ieee80211_hw *hw, } unlock: - if (err && (vif_id != -1)) { + if (err && (vif_id >= 0)) { vif_priv->active = false; bitmap_release_region(&ar->vif_bitmap, vif_id, 0); ar->vifs--; From b3915d1fb6557dda206f4644ba9aa96ffd9a99d2 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov <segoon@openwall.com> Date: Mon, 22 Nov 2010 18:59:13 +0300 Subject: [PATCH 077/179] ASoC: atmel: test wrong variable After clk_get() mclk is checked second time instead of pllb check. In patch v1 Jarkko Nikula noticed that PTR_ERR() is also has wrong argument. Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/atmel/sam9g20_wm8731.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 293569dfd0ed..032e17dd8fdb 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -222,9 +222,9 @@ static int __init at91sam9g20ek_init(void) } pllb = clk_get(NULL, "pllb"); - if (IS_ERR(mclk)) { + if (IS_ERR(pllb)) { printk(KERN_ERR "ASoC: Failed to get PLLB\n"); - ret = PTR_ERR(mclk); + ret = PTR_ERR(pllb); goto err_mclk; } ret = clk_set_parent(mclk, pllb); From f71a4734b1ad7edccbfd9bd395df328ebbd94287 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@opensource.wolfsonmicro.com> Date: Mon, 22 Nov 2010 19:11:48 +0000 Subject: [PATCH 078/179] ASoC: Fix multi-component mismerge in WM8523 Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> --- sound/soc/codecs/wm8523.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c index 712ef7c76f90..9a433a5396cb 100644 --- a/sound/soc/codecs/wm8523.c +++ b/sound/soc/codecs/wm8523.c @@ -146,7 +146,6 @@ static int wm8523_startup(struct snd_pcm_substream *substream, return -EINVAL; } - return 0; snd_pcm_hw_constraint_list(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &wm8523->rate_constraint); From eba19fdd818dfec3782ff095591e51c9bd617403 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@opensource.wolfsonmicro.com> Date: Fri, 19 Nov 2010 16:09:15 +0000 Subject: [PATCH 079/179] ASoC: Restore WM8994 volatile and readable register operations They went AWOL during the multi-component merge. Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> --- sound/soc/codecs/wm8994.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 830dfdd66c5f..ea3ee9fde2b1 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -4073,6 +4073,8 @@ static struct snd_soc_codec_driver soc_codec_dev_wm8994 = { .resume = wm8994_resume, .read = wm8994_read, .write = wm8994_write, + .readable_register = wm8994_readable, + .volatile_register = wm8994_volatile, .set_bias_level = wm8994_set_bias_level, }; From 103cfcf522cefe00d8c322c6beac9a711acbf235 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <error27@gmail.com> Date: Tue, 23 Nov 2010 09:26:02 +0300 Subject: [PATCH 080/179] nilfs2: nilfs_iget_for_gc() returns ERR_PTR nilfs_iget_for_gc() returns an ERR_PTR() on failure and doesn't return NULL. Signed-off-by: Dan Carpenter <error27@gmail.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> --- fs/nilfs2/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 3e90f86d5bfe..e00d9457c256 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -349,8 +349,8 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, ino = vdesc->vd_ino; cno = vdesc->vd_cno; inode = nilfs_iget_for_gc(sb, ino, cno); - if (unlikely(inode == NULL)) { - ret = -ENOMEM; + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto failed; } do { From 92a5288501685bf05fc348ee2a3115a9bd9ae36f Mon Sep 17 00:00:00 2001 From: Jesper Juhl <jj@chaosbits.net> Date: Mon, 22 Nov 2010 22:54:03 +0100 Subject: [PATCH 081/179] ASoC: MPC5200: Eliminate duplicate include of of_device.h Eliminate duplicate #include <linux/of_device.h> from sound/soc/fsl/mpc5200_dma.c Signed-off-by: Jesper Juhl <jj@chaosbits.net> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/fsl/mpc5200_dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index dce6b551cd78..f92dca07cd35 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/of_device.h> #include <linux/slab.h> -#include <linux/of_device.h> #include <linux/of_platform.h> #include <sound/soc.h> From 7a479b02843c8d78ef51a64d1168592258440c97 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Tue, 23 Nov 2010 14:14:07 +0800 Subject: [PATCH 082/179] ASoC: Do not update the cache if write to hardware failed Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/tpa6130a2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c index ee4fb201de60..d2c243095673 100644 --- a/sound/soc/codecs/tpa6130a2.c +++ b/sound/soc/codecs/tpa6130a2.c @@ -78,8 +78,10 @@ static int tpa6130a2_i2c_write(int reg, u8 value) if (data->power_state) { val = i2c_smbus_write_byte_data(tpa6130a2_client, reg, value); - if (val < 0) + if (val < 0) { dev_err(&tpa6130a2_client->dev, "Write failed\n"); + return val; + } } /* Either powered on or off, we save the context */ From bc5954f00e80c55140f546c80f34a8660bdd2c5f Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Tue, 23 Nov 2010 15:56:21 +0800 Subject: [PATCH 083/179] ASoC: max98088 - fix a memory leak Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/max98088.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c index 470cb93b1d1f..d63e28773eb1 100644 --- a/sound/soc/codecs/max98088.c +++ b/sound/soc/codecs/max98088.c @@ -2019,7 +2019,10 @@ static int max98088_probe(struct snd_soc_codec *codec) static int max98088_remove(struct snd_soc_codec *codec) { + struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec); + max98088_set_bias_level(codec, SND_SOC_BIAS_OFF); + kfree(max98088->eq_texts); return 0; } From cd70978cb59fd20dccdfc790ea8bb308c2dfd1d6 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Tue, 23 Nov 2010 15:57:49 +0800 Subject: [PATCH 084/179] ASoC: wm8904 - fix memory leaks Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/wm8904.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index 33be84e506ea..fca60a0b57b8 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -2498,6 +2498,8 @@ static int wm8904_remove(struct snd_soc_codec *codec) wm8904_set_bias_level(codec, SND_SOC_BIAS_OFF); regulator_bulk_free(ARRAY_SIZE(wm8904->supplies), wm8904->supplies); + kfree(wm8904->retune_mobile_texts); + kfree(wm8904->drc_texts); return 0; } From 24fb2b1174ddc1f844e2008eb5b3105832860395 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Tue, 23 Nov 2010 15:58:39 +0800 Subject: [PATCH 085/179] ASoC: wm8994 - fix memory leaks Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/wm8994.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index ea3ee9fde2b1..4d3e6f1ac584 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -4061,6 +4061,8 @@ static int wm8994_codec_remove(struct snd_soc_codec *codec) wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC2_DET, wm8994); wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_SHRT, wm8994); wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_DET, wm8994); + kfree(wm8994->retune_mobile_texts); + kfree(wm8994->drc_texts); kfree(wm8994); return 0; From 02a9d03772aa1ff33a26180a2da0bfb191240eda Mon Sep 17 00:00:00 2001 From: Rabin Vincent <rabin@rab.in> Date: Tue, 23 Nov 2010 22:08:18 +0530 Subject: [PATCH 086/179] perf symbols: Remove incorrect open-coded container_of() At least on ARM, padding is inserted between rb_node and sym in struct symbol_name_rb_node, causing "((void *)sym) - sizeof(struct rb_node)" to point inside rb_node rather than to the symbol_name_rb_node. Fix this by converting the code to use container_of(). Cc: Ian Munsie <imunsie@au1.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ming Lei <tom.leiming@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tom Zanussi <tzanussi@gmail.com> LKML-Reference: <20101123163106.GA25677@debian> Signed-off-by: Rabin Vincent <rabin@rab.in> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b39f499e575a..0500895a45af 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym) { struct rb_node **p = &self->rb_node; struct rb_node *parent = NULL; - struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s; + struct symbol_name_rb_node *symn, *s; + + symn = container_of(sym, struct symbol_name_rb_node, sym); while (*p != NULL) { parent = *p; From e476a5a41ad67d0e2b4a652820c49a3923eb936b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski <g.liakhovetski@gmx.de> Date: Tue, 23 Nov 2010 17:10:24 +0100 Subject: [PATCH 087/179] wireless: b43: fix error path in SDIO Fix unbalanced call to sdio_release_host() on the error path. Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> Acked-by: Larry Finger <Larry.Finger@lwfinger.net> Cc: stable@kernel.org Signed-off-by: John W. Linville <linville@tuxdriver.com> --- drivers/net/wireless/b43/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c index 9a55338d957f..09e2dfd7b175 100644 --- a/drivers/net/wireless/b43/sdio.c +++ b/drivers/net/wireless/b43/sdio.c @@ -163,6 +163,7 @@ static int b43_sdio_probe(struct sdio_func *func, err_free_ssb: kfree(sdio); err_disable_func: + sdio_claim_host(func); sdio_disable_func(func); err_release_host: sdio_release_host(func); From f6c26ec5085be805c9dc72d074ef5f504b9cd7df Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Date: Wed, 24 Nov 2010 02:18:59 +0900 Subject: [PATCH 088/179] nilfs2: fix typo in comment of nilfs_dat_move function Fixes a typo: "uncommited" -> "uncommitted". Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> --- fs/nilfs2/dat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 49c844dab33a..59e5fe742f7b 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) * the device at this point. * * To prevent nilfs_dat_translate() from returning the - * uncommited block number, this makes a copy of the entry + * uncommitted block number, this makes a copy of the entry * buffer and redirects nilfs_dat_translate() to the copy. */ if (!buffer_nilfs_redirected(entry_bh)) { From d94772070acc5a8f312ab4650cbbf5e78ea9dda2 Mon Sep 17 00:00:00 2001 From: Denis Kuplyakov <dener.kup@gmail.com> Date: Wed, 24 Nov 2010 06:01:09 +0100 Subject: [PATCH 089/179] ALSA: hda - Fix Acer 7730G support Fixes automatic EAPD configuration on Acer 7730G laptop. Signed-off-by: Denis Kuplyakov <dener.kup@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 49 +++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ac6aed0c889..8f7530fc7644 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2013,6 +2013,36 @@ static struct hda_verb alc888_acer_aspire_6530g_verbs[] = { { } }; +/* + *ALC888 Acer Aspire 7730G model + */ + +static struct hda_verb alc888_acer_aspire_7730G_verbs[] = { +/* Bias voltage on for external mic port */ + {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN | PIN_VREF80}, +/* Front Mic: set to PIN_IN (empty by default) */ + {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN}, +/* Unselect Front Mic by default in input mixer 3 */ + {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0xb)}, +/* Enable unsolicited event for HP jack */ + {0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, +/* Enable speaker output */ + {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x14, AC_VERB_SET_EAPD_BTLENABLE, 2}, +/* Enable headphone output */ + {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | PIN_HP}, + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x15, AC_VERB_SET_CONNECT_SEL, 0x00}, + {0x15, AC_VERB_SET_EAPD_BTLENABLE, 2}, +/*Enable internal subwoofer */ + {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x17, AC_VERB_SET_CONNECT_SEL, 0x02}, + {0x17, AC_VERB_SET_EAPD_BTLENABLE, 2}, + { } +}; + /* * ALC889 Acer Aspire 8930G model */ @@ -2200,6 +2230,16 @@ static void alc888_acer_aspire_6530g_setup(struct hda_codec *codec) spec->autocfg.speaker_pins[2] = 0x17; } +static void alc888_acer_aspire_7730g_setup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + spec->autocfg.hp_pins[0] = 0x15; + spec->autocfg.speaker_pins[0] = 0x14; + spec->autocfg.speaker_pins[1] = 0x16; + spec->autocfg.speaker_pins[2] = 0x17; +} + static void alc889_acer_aspire_8930g_setup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -9524,13 +9564,6 @@ static struct hda_verb alc883_acer_eapd_verbs[] = { { } }; -static struct hda_verb alc888_acer_aspire_7730G_verbs[] = { - {0x15, AC_VERB_SET_CONNECT_SEL, 0x00}, - {0x17, AC_VERB_SET_CONNECT_SEL, 0x02}, - {0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, - { } /* end */ -}; - static void alc888_6st_dell_setup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -10328,7 +10361,7 @@ static struct alc_config_preset alc882_presets[] = { .const_channel_count = 6, .input_mux = &alc883_capture_source, .unsol_event = alc_automute_amp_unsol_event, - .setup = alc888_acer_aspire_6530g_setup, + .setup = alc888_acer_aspire_7730g_setup, .init_hook = alc_automute_amp, }, [ALC883_MEDION] = { From e8ee13a818db4954517cea7da6e7c15b9656eb00 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 19 Nov 2010 07:22:58 +0000 Subject: [PATCH 090/179] ARM: mach-shmobile: clock-sh7372: remove fsidiv bogus disable Current FSIDIV clock framework had bogus disable. This patch remove it. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/clock-sh7372.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..13226323e4e0 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -453,10 +453,8 @@ static int fsidiv_enable(struct clk *clk) unsigned long value; value = __raw_readl(clk->mapping->base) >> 16; - if (value < 2) { - fsidiv_disable(clk); + if (value < 2) return -ENOENT; - } __raw_writel((value << 16) | 0x3, clk->mapping->base); @@ -468,11 +466,6 @@ static int fsidiv_set_rate(struct clk *clk, { int idx; - if (clk->parent->rate == rate) { - fsidiv_disable(clk); - return 0; - } - idx = (clk->parent->rate / rate) & 0xffff; if (idx < 2) return -ENOENT; From d4bc99b977e3a1dd10a84a01ebe59ac2ccebf0cd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Wed, 24 Nov 2010 02:44:06 +0000 Subject: [PATCH 091/179] ARM: mach-shmobile: ap4evb: FSI clock use proper process for HDMI Current AP4 FSI set_rate function used bogus clock process which didn't care enable/disable and clk->usecound. To solve this issue, this patch also modify FSI driver to call set_rate with enough options. This patch modify it. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/board-ap4evb.c | 60 ++++++++++++++++++++------- arch/arm/mach-shmobile/clock-sh7372.c | 2 +- include/sound/sh_fsi.h | 6 ++- sound/soc/sh/fsi.c | 19 ++++++++- 4 files changed, 68 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index d3260542b943..61c1068198ec 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -567,40 +567,72 @@ static struct platform_device *qhd_devices[] __initdata = { /* FSI */ #define IRQ_FSI evt2irq(0x1840) +static int __fsi_set_rate(struct clk *clk, long rate, int enable) +{ + int ret = 0; -static int fsi_set_rate(int is_porta, int rate) + if (rate <= 0) + return ret; + + if (enable) { + ret = clk_set_rate(clk, clk_round_rate(clk, rate)); + if (0 == ret) + ret = clk_enable(clk); + } else { + clk_disable(clk); + } + + return ret; +} + +static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) { struct clk *fsib_clk; struct clk *fdiv_clk = &sh7372_fsidivb_clk; + long fsib_rate = 0; + long fdiv_rate = 0; + int ackmd_bpfmd; int ret; /* set_rate is not needed if port A */ if (is_porta) return 0; - fsib_clk = clk_get(NULL, "fsib_clk"); - if (IS_ERR(fsib_clk)) - return -EINVAL; - switch (rate) { case 44100: - clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000)); - ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; + fsib_rate = rate * 256; + ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; break; case 48000: - clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); - clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); - ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; + fsib_rate = 85428000; /* around 48kHz x 256 x 7 */ + fdiv_rate = rate * 256; + ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; break; default: pr_err("unsupported rate in FSI2 port B\n"); - ret = -EINVAL; - break; + return -EINVAL; } - clk_put(fsib_clk); + /* FSI B setting */ + fsib_clk = clk_get(dev, "ickb"); + if (IS_ERR(fsib_clk)) + return -EIO; - return ret; + ret = __fsi_set_rate(fsib_clk, fsib_rate, enable); + clk_put(fsib_clk); + if (ret < 0) + return ret; + + /* FSI DIV setting */ + ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable); + if (ret < 0) { + /* disable FSI B */ + if (enable) + __fsi_set_rate(fsib_clk, fsib_rate, 0); + return ret; + } + + return ackmd_bpfmd; } static struct sh_fsi_platform_info fsi_info = { diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 13226323e4e0..4191e2921127 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -471,7 +471,7 @@ static int fsidiv_set_rate(struct clk *clk, return -ENOENT; __raw_writel(idx << 16, clk->mapping->base); - return fsidiv_enable(clk); + return 0; } static struct clk_ops fsidiv_clk_ops = { diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h index fa60cbda90a4..d79894192ae3 100644 --- a/include/sound/sh_fsi.h +++ b/include/sound/sh_fsi.h @@ -85,7 +85,9 @@ * ACK_MD (FSI2) * CKG1 (FSI) * - * err: return value < 0 + * err : return value < 0 + * no change : return value == 0 + * change xMD : return value > 0 * * 0x-00000AB * @@ -111,7 +113,7 @@ struct sh_fsi_platform_info { unsigned long porta_flags; unsigned long portb_flags; - int (*set_rate)(int is_porta, int rate); /* for master mode */ + int (*set_rate)(struct device *dev, int is_porta, int rate, int enable); }; #endif /* __SOUND_FSI_H */ diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 507e709f2807..136414f163e9 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -132,6 +132,8 @@ struct fsi_priv { struct fsi_stream playback; struct fsi_stream capture; + long rate; + u32 mst_ctrl; }; @@ -854,10 +856,17 @@ static void fsi_dai_shutdown(struct snd_pcm_substream *substream, { struct fsi_priv *fsi = fsi_get_priv(substream); int is_play = fsi_is_play(substream); + struct fsi_master *master = fsi_get_master(fsi); + int (*set_rate)(struct device *dev, int is_porta, int rate, int enable); fsi_irq_disable(fsi, is_play); fsi_clk_ctrl(fsi, 0); + set_rate = master->info->set_rate; + if (set_rate && fsi->rate) + set_rate(dai->dev, fsi_is_port_a(fsi), fsi->rate, 0); + fsi->rate = 0; + pm_runtime_put_sync(dai->dev); } @@ -891,9 +900,10 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream, { struct fsi_priv *fsi = fsi_get_priv(substream); struct fsi_master *master = fsi_get_master(fsi); - int (*set_rate)(int is_porta, int rate) = master->info->set_rate; + int (*set_rate)(struct device *dev, int is_porta, int rate, int enable); int fsi_ver = master->core->ver; int is_play = fsi_is_play(substream); + long rate = params_rate(params); int ret; /* if slave mode, set_rate is not needed */ @@ -901,10 +911,15 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream, return 0; /* it is error if no set_rate */ + set_rate = master->info->set_rate; if (!set_rate) return -EIO; - ret = set_rate(fsi_is_port_a(fsi), params_rate(params)); + ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1); + if (ret < 0) /* error */ + return ret; + + fsi->rate = rate; if (ret > 0) { u32 data = 0; From 22de4e1fe446794acaebdf19dcaff4256d659972 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 19 Nov 2010 07:23:17 +0000 Subject: [PATCH 092/179] ARM: mach-shmobile: ap4evb: FSI clock use proper process for ak4642 Current AP4 FSI didn't use set_rate for ak4642, and used dummy rate when init. And FSI driver was modified to always call set_rate. The user which are using FSI set_rate is only AP4 now. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/board-ap4evb.c | 96 +++++++++++++++++++-------- sound/soc/sh/fsi.c | 8 +-- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 61c1068198ec..e084b423146e 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -575,7 +575,7 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable) return ret; if (enable) { - ret = clk_set_rate(clk, clk_round_rate(clk, rate)); + ret = clk_set_rate(clk, rate); if (0 == ret) ret = clk_enable(clk); } else { @@ -585,7 +585,56 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable) return ret; } -static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) +static int __fsi_set_round_rate(struct clk *clk, long rate, int enable) +{ + return __fsi_set_rate(clk, clk_round_rate(clk, rate), enable); +} + +static int fsi_ak4642_set_rate(struct device *dev, int rate, int enable) +{ + struct clk *fsia_ick; + struct clk *fsiack; + int ret = -EIO; + + fsia_ick = clk_get(dev, "icka"); + if (IS_ERR(fsia_ick)) + return PTR_ERR(fsia_ick); + + /* + * FSIACK is connected to AK4642, + * and use external clock pin from it. + * it is parent of fsia_ick now. + */ + fsiack = clk_get_parent(fsia_ick); + if (!fsiack) + goto fsia_ick_out; + + /* + * we get 1/1 divided clock by setting same rate to fsiack and fsia_ick + * + ** FIXME ** + * Because the freq_table of external clk (fsiack) are all 0, + * the return value of clk_round_rate became 0. + * So, it use __fsi_set_rate here. + */ + ret = __fsi_set_rate(fsiack, rate, enable); + if (ret < 0) + goto fsiack_out; + + ret = __fsi_set_round_rate(fsia_ick, rate, enable); + if ((ret < 0) && enable) + __fsi_set_round_rate(fsiack, rate, 0); /* disable FSI ACK */ + +fsiack_out: + clk_put(fsiack); + +fsia_ick_out: + clk_put(fsia_ick); + + return 0; +} + +static int fsi_hdmi_set_rate(struct device *dev, int rate, int enable) { struct clk *fsib_clk; struct clk *fdiv_clk = &sh7372_fsidivb_clk; @@ -594,10 +643,6 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) int ackmd_bpfmd; int ret; - /* set_rate is not needed if port A */ - if (is_porta) - return 0; - switch (rate) { case 44100: fsib_rate = rate * 256; @@ -618,23 +663,35 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) if (IS_ERR(fsib_clk)) return -EIO; - ret = __fsi_set_rate(fsib_clk, fsib_rate, enable); + ret = __fsi_set_round_rate(fsib_clk, fsib_rate, enable); clk_put(fsib_clk); if (ret < 0) return ret; /* FSI DIV setting */ - ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable); + ret = __fsi_set_round_rate(fdiv_clk, fdiv_rate, enable); if (ret < 0) { /* disable FSI B */ if (enable) - __fsi_set_rate(fsib_clk, fsib_rate, 0); + __fsi_set_round_rate(fsib_clk, fsib_rate, 0); return ret; } return ackmd_bpfmd; } +static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) +{ + int ret; + + if (is_porta) + ret = fsi_ak4642_set_rate(dev, rate, enable); + else + ret = fsi_hdmi_set_rate(dev, rate, enable); + + return ret; +} + static struct sh_fsi_platform_info fsi_info = { .porta_flags = SH_FSI_BRS_INV | SH_FSI_OUT_SLAVE_MODE | @@ -928,23 +985,11 @@ static int __init hdmi_init_pm_clock(void) device_initcall(hdmi_init_pm_clock); -#define FSIACK_DUMMY_RATE 48000 static int __init fsi_init_pm_clock(void) { struct clk *fsia_ick; int ret; - /* - * FSIACK is connected to AK4642, - * and the rate is depend on playing sound rate. - * So, set dummy rate (= 48k) here - */ - ret = clk_set_rate(&sh7372_fsiack_clk, FSIACK_DUMMY_RATE); - if (ret < 0) { - pr_err("Cannot set FSIACK dummy rate: %d\n", ret); - return ret; - } - fsia_ick = clk_get(&fsi_device.dev, "icka"); if (IS_ERR(fsia_ick)) { ret = PTR_ERR(fsia_ick); @@ -953,16 +998,9 @@ static int __init fsi_init_pm_clock(void) } ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk); - if (ret < 0) { - pr_err("Cannot set FSI-A parent: %d\n", ret); - goto out; - } - - ret = clk_set_rate(fsia_ick, FSIACK_DUMMY_RATE); if (ret < 0) - pr_err("Cannot set FSI-A rate: %d\n", ret); + pr_err("Cannot set FSI-A parent: %d\n", ret); -out: clk_put(fsia_ick); return ret; diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 136414f163e9..4c2404b1b862 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -902,18 +902,12 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream, struct fsi_master *master = fsi_get_master(fsi); int (*set_rate)(struct device *dev, int is_porta, int rate, int enable); int fsi_ver = master->core->ver; - int is_play = fsi_is_play(substream); long rate = params_rate(params); int ret; - /* if slave mode, set_rate is not needed */ - if (!fsi_is_master_mode(fsi, is_play)) - return 0; - - /* it is error if no set_rate */ set_rate = master->info->set_rate; if (!set_rate) - return -EIO; + return 0; ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1); if (ret < 0) /* error */ From a57b1a9bdfb61e8d7dc7acc5e2d8bc04c549e668 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 19 Nov 2010 07:23:26 +0000 Subject: [PATCH 093/179] ARM: mach-shmobile: clock-sh7372: modify error code Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/clock-sh7372.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 4191e2921127..41663e0ca104 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -454,7 +454,7 @@ static int fsidiv_enable(struct clk *clk) value = __raw_readl(clk->mapping->base) >> 16; if (value < 2) - return -ENOENT; + return -EIO; __raw_writel((value << 16) | 0x3, clk->mapping->base); @@ -468,7 +468,7 @@ static int fsidiv_set_rate(struct clk *clk, idx = (clk->parent->rate / rate) & 0xffff; if (idx < 2) - return -ENOENT; + return -EINVAL; __raw_writel(idx << 16, clk->mapping->base); return 0; From b16a2892b9852839307894cc429b7a7b145138a7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 19 Nov 2010 07:23:32 +0000 Subject: [PATCH 094/179] ARM: mach-shmobile: clock-sh7372: remove unnecessary fsi clocks Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/clock-sh7372.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 41663e0ca104..cbeca2849918 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -602,8 +602,6 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]), CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]), CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]), - CLKDEV_CON_ID("fsia_clk", &div6_reparent_clks[DIV6_FSIA]), - CLKDEV_CON_ID("fsib_clk", &div6_reparent_clks[DIV6_FSIB]), CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]), CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]), CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]), From 421b446abeec55bed1251fab80cb5c12be58b773 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 19 Nov 2010 07:23:52 +0000 Subject: [PATCH 095/179] ARM: mach-shmobile: clock-sh7372: remove bogus pllc2 clock toggling. The PLLC2 clock was utilizing the same sort of enable/disable without regard to usecount approach that the FSIDIV clock was when being used as a PLL pass-through. This forces the enable/disable through the clock framework, which now prevents the clock from being ripped out or modified underneath users that have an existing handle on it. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> --- arch/arm/mach-shmobile/board-ap4evb.c | 5 +++++ arch/arm/mach-shmobile/clock-sh7372.c | 12 ++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index e084b423146e..d440e5f456ad 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -969,6 +969,11 @@ static int __init hdmi_init_pm_clock(void) goto out; } + ret = clk_enable(&sh7372_pllc2_clk); + if (ret < 0) { + pr_err("Cannot enable pllc2 clock\n"); + goto out; + } pr_debug("PLLC2 set frequency %lu\n", rate); ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk); diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index cbeca2849918..e18a1241a95e 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -230,21 +230,13 @@ static int pllc2_set_rate(struct clk *clk, if (idx < 0) return idx; - if (rate == clk->parent->rate) { - pllc2_disable(clk); - return 0; - } + if (rate == clk->parent->rate) + return -EINVAL; value = __raw_readl(PLLC2CR) & ~(0x3f << 24); - if (value & 0x80000000) - pllc2_disable(clk); - __raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR); - if (value & 0x80000000) - return pllc2_enable(clk); - return 0; } From 08b1a38465cab8c2224a5202c7a3b5e5f5630894 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 10:20:33 +0800 Subject: [PATCH 096/179] ASoC: wm8961 - clear WM8961_DACSLOPE bit for normal mode DACSLOPE bit of Register 06h ADC and DAC Control 2: 0: Normal mode 1: Sloping stop-band mode Thus in the case of normal mode, we should clear DACSLOPE bit. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> Cc: stable@kernel.org --- sound/soc/codecs/wm8961.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c index 4f326f604104..93fecf5f94c6 100644 --- a/sound/soc/codecs/wm8961.c +++ b/sound/soc/codecs/wm8961.c @@ -711,7 +711,7 @@ static int wm8961_hw_params(struct snd_pcm_substream *substream, if (fs <= 24000) reg |= WM8961_DACSLOPE; else - reg &= WM8961_DACSLOPE; + reg &= ~WM8961_DACSLOPE; snd_soc_write(codec, WM8961_ADC_DAC_CONTROL_2, reg); return 0; From 2f7dceeda4708f470fd927adb3861bd8ebbe2310 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 10:21:54 +0800 Subject: [PATCH 097/179] ASoC: wm8961 - clear WM8961_MCLKDIV bit for freq <= 16500000 MCLKDIV bit of Register 04h Clocking1: 0 : Divide by 1 1 : Divide by 2 Thus in the case of freq <= 16500000, we should clear MCLKDIV bit. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> Cc: stable@kernel.org --- sound/soc/codecs/wm8961.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c index 93fecf5f94c6..8340485c9851 100644 --- a/sound/soc/codecs/wm8961.c +++ b/sound/soc/codecs/wm8961.c @@ -736,7 +736,7 @@ static int wm8961_set_sysclk(struct snd_soc_dai *dai, int clk_id, freq /= 2; } else { dev_dbg(codec->dev, "Using MCLK/1 for %dHz MCLK\n", freq); - reg &= WM8961_MCLKDIV; + reg &= ~WM8961_MCLKDIV; } snd_soc_write(codec, WM8961_CLOCKING1, reg); From 5c12d20145ce30f9f8b7415d36dace5fb4dcc4f0 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 15:20:48 +0800 Subject: [PATCH 098/179] ASoC: Return proper error for omap3pandora_soc_init Return PTR_ERR(omap3pandora_dac_reg) instead of 0 if regulator_get failed. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/omap/omap3pandora.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c index dbd9d96b5f92..4ee33ce2cb98 100644 --- a/sound/soc/omap/omap3pandora.c +++ b/sound/soc/omap/omap3pandora.c @@ -306,6 +306,7 @@ static int __init omap3pandora_soc_init(void) pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", dev_name(&omap3pandora_snd_device->dev), PTR_ERR(omap3pandora_dac_reg)); + ret = PTR_ERR(omap3pandora_dac_reg); goto fail3; } From d6f443ae4c1d54379ad5953d7bcb89a63387184d Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 16:44:23 +0800 Subject: [PATCH 099/179] ASoC: nuc900-ac97: fix a memory leak Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Acked-by: Wan ZongShun <mcuos.com@gmail.com> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/nuc900/nuc900-ac97.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c index 293dc748797c..e00e39dd6576 100644 --- a/sound/soc/nuc900/nuc900-ac97.c +++ b/sound/soc/nuc900/nuc900-ac97.c @@ -384,7 +384,6 @@ static int __devinit nuc900_ac97_drvprobe(struct platform_device *pdev) static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev) { - snd_soc_unregister_dai(&pdev->dev); clk_put(nuc900_ac97_data->clk); @@ -392,6 +391,7 @@ static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev) release_mem_region(nuc900_ac97_data->res->start, resource_size(nuc900_ac97_data->res)); + kfree(nuc900_ac97_data); nuc900_ac97_data = NULL; return 0; From cc1c452e509aefc28f7ad2deed75bc69d4f915f7 Mon Sep 17 00:00:00 2001 From: David Henningsson <david.henningsson@canonical.com> Date: Wed, 24 Nov 2010 14:17:47 +0100 Subject: [PATCH 100/179] ALSA: HDA: Add an extra DAC for Realtek ALC887-VD The patch enables ALC887-VD to use the DAC at nid 0x26, which makes it possible to use this DAC for e g Headphone volume. Signed-off-by: David Henningsson <david.henningsson@canonical.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8f7530fc7644..b0e6b8b47fa9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -18997,6 +18997,8 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid) return 0x02; else if (nid >= 0x0c && nid <= 0x0e) return nid - 0x0c + 0x02; + else if (nid == 0x26) /* ALC887-VD has this DAC too */ + return 0x25; else return 0; } @@ -19005,7 +19007,7 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid) static hda_nid_t alc662_dac_to_mix(struct hda_codec *codec, hda_nid_t pin, hda_nid_t dac) { - hda_nid_t mix[4]; + hda_nid_t mix[5]; int i, num; num = snd_hda_get_connections(codec, pin, mix, ARRAY_SIZE(mix)); From 9915672d41273f5b77f1b3c29b391ffb7732b84b Mon Sep 17 00:00:00 2001 From: Eric Dumazet <eric.dumazet@gmail.com> Date: Wed, 24 Nov 2010 09:15:27 -0800 Subject: [PATCH 101/179] af_unix: limit unix_tot_inflight Vegard Nossum found a unix socket OOM was possible, posting an exploit program. My analysis is we can eat all LOWMEM memory before unix_gc() being called from unix_release_sock(). Moreover, the thread blocked in unix_gc() can consume huge amount of time to perform cleanup because of huge working set. One way to handle this is to have a sensible limit on unix_tot_inflight, tested from wait_for_unix_gc() and to force a call to unix_gc() if this limit is hit. This solves the OOM and also reduce overall latencies, and should not slowdown normal workloads. Reported-by: Vegard Nossum <vegard.nossum@gmail.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/unix/garbage.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index c8df6fda0b1f..40df93d1cf35 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress = false; +#define UNIX_INFLIGHT_TRIGGER_GC 16000 void wait_for_unix_gc(void) { + /* + * If number of inflight sockets is insane, + * force a garbage collect right now. + */ + if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } From 4448008eb12f4b6bb9993584de8ec1d20b708d6f Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt@goodmis.org> Date: Wed, 24 Nov 2010 11:19:05 -0800 Subject: [PATCH 102/179] isdn: icn: Fix stack corruption bug. Running randconfig with ktest.pl I hit this bug: [ 16.101158] ICN-ISDN-driver Rev 1.65.6.8 mem=0x000d0000 [ 16.106376] icn: (line0) ICN-2B, port 0x320 added [ 16.111064] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c1642880 [ 16.111066] [ 16.121214] Pid: 1, comm: swapper Not tainted 2.6.37-rc2-test-00124-g6656b3f #8 [ 16.128499] Call Trace: [ 16.130942] [<c0f51662>] ? printk+0x1d/0x23 [ 16.135200] [<c0f5153f>] panic+0x5c/0x162 [ 16.139286] [<c0d62a9a>] ? icn_addcard+0x6d/0xbe [ 16.143975] [<c0445783>] print_tainted+0x0/0x8c [ 16.148582] [<c1642880>] ? icn_init+0xd8/0xdf [ 16.153012] [<c1642880>] icn_init+0xd8/0xdf [ 16.157271] [<c04012e5>] do_one_initcall+0x8c/0x143 [ 16.162222] [<c16427a8>] ? icn_init+0x0/0xdf [ 16.166566] [<c15f1a05>] kernel_init+0x13f/0x1da [ 16.171256] [<c15f18c6>] ? kernel_init+0x0/0x1da [ 16.175945] [<c0403bfe>] kernel_thread_helper+0x6/0x10 [ 16.181181] panic occurred, switching back to text console Looking into it I found that the stack was corrupted by the assignment of the Rev #. The variable rev is given 10 bytes, and in this output the characters that were copied was: " 1.65.6.8 $". Which was 11 characters plus the null ending character for a total of 12 bytes, thus corrupting the stack. This patch ups the variable size to 20 bytes as well as changes the strcpy to strncpy. I also added a check to make sure '$' is found. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/isdn/icn/icn.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c index 2e847a90bad0..f2b5bab5e6a1 100644 --- a/drivers/isdn/icn/icn.c +++ b/drivers/isdn/icn/icn.c @@ -1627,7 +1627,7 @@ __setup("icn=", icn_setup); static int __init icn_init(void) { char *p; - char rev[10]; + char rev[20]; memset(&dev, 0, sizeof(icn_dev)); dev.memaddr = (membase & 0x0ffc000); @@ -1637,9 +1637,10 @@ static int __init icn_init(void) spin_lock_init(&dev.devlock); if ((p = strchr(revision, ':'))) { - strcpy(rev, p + 1); + strncpy(rev, p + 1, 20); p = strchr(rev, '$'); - *p = 0; + if (p) + *p = 0; } else strcpy(rev, " ??? "); printk(KERN_NOTICE "ICN-ISDN-driver Rev%smem=0x%08lx\n", rev, From c39508d6f118308355468314ff414644115a07f3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Wed, 24 Nov 2010 11:47:22 -0800 Subject: [PATCH 103/179] tcp: Make TCP_MAXSEG minimum more correct. Use TCP_MIN_MSS instead of constant 64. Reported-by: Min Zhang <mzhang@mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 081419969485..f15c36a706ec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 64 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } From fa0e846494792e722d817b9d3d625a4ef4896c96 Mon Sep 17 00:00:00 2001 From: Phil Blundell <philb@gnu.org> Date: Wed, 24 Nov 2010 11:49:19 -0800 Subject: [PATCH 104/179] econet: disallow NULL remote addr for sendmsg(), fixes CVE-2010-3849 Later parts of econet_sendmsg() rely on saddr != NULL, so return early with EINVAL if NULL was passed otherwise an oops may occur. Signed-off-by: Phil Blundell <philb@gnu.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/econet/af_econet.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index f8c1ae4b41f0..e366f1bef91f 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -297,23 +297,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL) { - struct econet_sock *eo = ec_sk(sk); - - addr.station = eo->station; - addr.net = eo->net; - port = eo->port; - cb = eo->cb; - } else { - if (msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; - } + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -351,7 +342,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb = (struct ec_cb *)&skb->cb; - /* BUG: saddr may be NULL */ eb->cookie = saddr->cookie; eb->sec = *saddr; eb->sent = ec_tx_done; From 16c41745c7b92a243d0874f534c1655196c64b74 Mon Sep 17 00:00:00 2001 From: Phil Blundell <philb@gnu.org> Date: Wed, 24 Nov 2010 11:49:53 -0800 Subject: [PATCH 105/179] econet: fix CVE-2010-3850 Add missing check for capable(CAP_NET_ADMIN) in SIOCSIFADDR operation. Signed-off-by: Phil Blundell <philb@gnu.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/econet/af_econet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index e366f1bef91f..d41ba8e56c10 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -661,6 +661,9 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) err = 0; switch (cmd) { case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + edev = dev->ec_ptr; if (edev == NULL) { /* Magic up a new one. */ From a27e13d370415add3487949c60810e36069a23a6 Mon Sep 17 00:00:00 2001 From: Phil Blundell <philb@gnu.org> Date: Wed, 24 Nov 2010 11:51:47 -0800 Subject: [PATCH 106/179] econet: fix CVE-2010-3848 Don't declare variable sized array of iovecs on the stack since this could cause stack overflow if msg->msgiovlen is large. Instead, coalesce the user-supplied data into a new buffer and use a single iovec for it. Signed-off-by: Phil Blundell <philb@gnu.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/econet/af_econet.c | 62 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index d41ba8e56c10..13992e1d2726 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/sock.h> #include <net/inet_common.h> #include <linux/stat.h> @@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, #endif #ifdef CONFIG_ECONET_AUNUDP struct msghdr udpmsg; - struct iovec iov[msg->msg_iovlen+1]; + struct iovec iov[2]; struct aunhdr ah; struct sockaddr_in udpdest; __kernel_size_t size; - int i; mm_segment_t oldfs; + char *userbuf; #endif /* @@ -319,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } } - if (len + 15 > dev->mtu) { - mutex_unlock(&econet_mutex); - return -EMSGSIZE; - } - if (dev->type == ARPHRD_ECONET) { /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; int res; + if (len + 15 > dev->mtu) { + mutex_unlock(&econet_mutex); + return -EMSGSIZE; + } + dev_hold(dev); skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), @@ -405,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, return -ENETDOWN; /* No socket - can't send */ } + if (len > 32768) { + err = -E2BIG; + goto error; + } + /* Make up a UDP datagram and hand it off to some higher intellect. */ memset(&udpdest, 0, sizeof(udpdest)); @@ -436,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); - /* - * XXX: that is b0rken. We can't mix userland and kernel pointers - * in iovec, since on a lot of platforms copy_from_user() will - * *not* work with the kernel and userland ones at the same time, - * regardless of what we do with set_fs(). And we are talking about - * econet-over-ethernet here, so "it's only ARM anyway" doesn't - * apply. Any suggestions on fixing that code? -- AV - */ iov[0].iov_base = (void *)&ah; iov[0].iov_len = size; - for (i = 0; i < msg->msg_iovlen; i++) { - void __user *base = msg->msg_iov[i].iov_base; - size_t iov_len = msg->msg_iov[i].iov_len; - /* Check it now since we switch to KERNEL_DS later. */ - if (!access_ok(VERIFY_READ, base, iov_len)) { - mutex_unlock(&econet_mutex); - return -EFAULT; - } - iov[i+1].iov_base = base; - iov[i+1].iov_len = iov_len; - size += iov_len; + + userbuf = vmalloc(len); + if (userbuf == NULL) { + err = -ENOMEM; + goto error; } + iov[1].iov_base = userbuf; + iov[1].iov_len = len; + err = memcpy_fromiovec(userbuf, msg->msg_iov, len); + if (err) + goto error_free_buf; + /* Get a skbuff (no data, just holds our cb information) */ if ((skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) { - mutex_unlock(&econet_mutex); - return err; - } + &err)) == NULL) + goto error_free_buf; eb = (struct ec_cb *)&skb->cb; @@ -481,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_name = (void *)&udpdest; udpmsg.msg_namelen = sizeof(udpdest); udpmsg.msg_iov = &iov[0]; - udpmsg.msg_iovlen = msg->msg_iovlen + 1; + udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; udpmsg.msg_flags=0; @@ -489,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); + +error_free_buf: + vfree(userbuf); #else err = -EPROTOTYPE; #endif + error: mutex_unlock(&econet_mutex); return err; From f910043ce00791c06afc3789278447c4e88670ea Mon Sep 17 00:00:00 2001 From: Kevin Hilman <khilman@deeprootsystems.com> Date: Wed, 24 Nov 2010 11:09:03 -0800 Subject: [PATCH 107/179] OMAP: UART: don't resume UARTs that are not enabled. Add additional check to omap_uart_resume_idle() so that only enabled (specifically, idle-enabled) UARTs are allowed to resume. This matches the existing check in prepare idle. Without this patch, the system will hang if a board is configured to register only some uarts instead of all of them and PM is enabled. Cc: Govindraj R. <govindraj.raja@ti.com> Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com> [tony@atomide.com: updated description] Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/mach-omap2/serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index becf0e38ef7e..bc934dbe3bf0 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -406,7 +406,7 @@ void omap_uart_resume_idle(int num) struct omap_uart_state *uart; list_for_each_entry(uart, &uart_list, node) { - if (num == uart->num) { + if (num == uart->num && uart->can_sleep) { omap_uart_enable_clocks(uart); /* Check for IO pad wakeup */ From 0d8e2d0dad98a693bad88aea6876ac8b94ad95c6 Mon Sep 17 00:00:00 2001 From: Paul Walmsley <paul@pwsan.com> Date: Wed, 24 Nov 2010 16:49:05 -0700 Subject: [PATCH 108/179] OMAP2+: PM/serial: hold console semaphore while OMAP UARTs are disabled The console semaphore must be held while the OMAP UART devices are disabled, lest a console write cause an ARM abort (and a kernel crash) when the underlying console device is inaccessible. These crashes only occur when the console is on one of the OMAP internal serial ports. While this problem has been latent in the PM idle loop for some time, the crash was not triggerable with an unmodified kernel until commit 6f251e9db1093c187addc309b5f2f7fe3efd2995 ("OMAP: UART: omap_device conversions, remove implicit 8520 assumptions"). After this patch, a console write often occurs after the console UART has been disabled in the idle loop, crashing the system. Several users have encountered this bug: http://www.mail-archive.com/linux-omap@vger.kernel.org/msg38396.html http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36602.html The same commit also introduced new code that disabled the UARTs during init, in omap_serial_init_port(). The kernel will also crash in this code when earlyconsole and extra debugging is enabled: http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36411.html The minimal fix for the -rc series is to hold the console semaphore while the OMAP UARTs are disabled. This is a somewhat overbroad fix, since the console may not be located on an OMAP UART, as is the case with the GPMC UART on Zoom3. While it is technically possible to determine which devices the console or earlyconsole is actually running on, it is not a trivial problem to solve, and the code to do so is not really appropriate for the -rc series. The right long-term fix is to ensure that no code outside of the OMAP serial driver can disable an OMAP UART. As I understand it, code to implement this is under development by TI. This patch is a collaboration between Paul Walmsley <paul@pwsan.com> and Tony Lindgren <tony@atomide.com>. Thanks to Ming Lei <tom.leiming@gmail.com> and Pramod <pramod.gurav@ti.com> for their feedback on earlier versions of this patch. Signed-off-by: Paul Walmsley <paul@pwsan.com> Signed-off-by: Tony Lindgren <tony@atomide.com> Acked-by: Kevin Hilman <khilman@deeprootsystems.com> Cc: Ming Lei <tom.leiming@gmail.com> Cc: Pramod <pramod.gurav@ti.com> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Jean Pihet <jean.pihet@newoldbits.com> Cc: Govindraj.R <govindraj.raja@ti.com> --- arch/arm/mach-omap2/pm24xx.c | 7 +++++++ arch/arm/mach-omap2/pm34xx.c | 10 ++++++++++ arch/arm/mach-omap2/serial.c | 5 +++++ 3 files changed, 22 insertions(+) diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index a40457d81927..c85923e56b85 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -30,6 +30,7 @@ #include <linux/irq.h> #include <linux/time.h> #include <linux/gpio.h> +#include <linux/console.h> #include <asm/mach/time.h> #include <asm/mach/irq.h> @@ -118,6 +119,10 @@ static void omap2_enter_full_retention(void) if (omap_irq_pending()) goto no_sleep; + /* Block console output in case it is on one of the OMAP UARTs */ + if (try_acquire_console_sem()) + goto no_sleep; + omap_uart_prepare_idle(0); omap_uart_prepare_idle(1); omap_uart_prepare_idle(2); @@ -131,6 +136,8 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(1); omap_uart_resume_idle(0); + release_console_sem(); + no_sleep: if (omap2_pm_debug) { unsigned long long tmp; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 75c0cd13ad8e..0ec8a04b7473 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -28,6 +28,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/console.h> #include <plat/sram.h> #include <plat/clockdomain.h> @@ -385,6 +386,12 @@ void omap_sram_idle(void) omap3_enable_io_chain(); } + /* Block console output in case it is on one of the OMAP UARTs */ + if (per_next_state < PWRDM_POWER_ON || + core_next_state < PWRDM_POWER_ON) + if (try_acquire_console_sem()) + goto console_still_active; + /* PER */ if (per_next_state < PWRDM_POWER_ON) { omap_uart_prepare_idle(2); @@ -463,6 +470,9 @@ void omap_sram_idle(void) omap_uart_resume_idle(3); } + release_console_sem(); + +console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ if (omap3_has_io_wakeup() && (per_next_state < PWRDM_POWER_ON || diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index bc934dbe3bf0..d17960a1be25 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/serial_8250.h> #include <linux/pm_runtime.h> +#include <linux/console.h> #ifdef CONFIG_SERIAL_OMAP #include <plat/omap-serial.h> @@ -807,6 +808,8 @@ void __init omap_serial_init_port(int port) oh->dev_attr = uart; + acquire_console_sem(); /* in case the earlycon is on the UART */ + /* * Because of early UART probing, UART did not get idled * on init. Now that omap_device is ready, ensure full idle @@ -831,6 +834,8 @@ void __init omap_serial_init_port(int port) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; + release_console_sem(); + if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { device_init_wakeup(&od->pdev.dev, true); From 6fc50eafc49262e376455a480f9d793817fe74e2 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick <anarsoul@gmail.com> Date: Tue, 16 Nov 2010 18:11:59 +0900 Subject: [PATCH 109/179] ARM: S3C2410: Adapt h1940-bluetooth to gpiolib changes Replace in s3c_gpio_cfgpull with s3c_gpio_setpull. Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/mach-s3c2410/h1940-bluetooth.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c index 8aa2f1902a94..6b86a722a7db 100644 --- a/arch/arm/mach-s3c2410/h1940-bluetooth.c +++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c @@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev) /* Configures BT serial port GPIOs */ s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0); - s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT); - s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0); - s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0); - s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH, From cce58ab380727169ef72b76481441f851e5850b0 Mon Sep 17 00:00:00 2001 From: Kukjin Kim <kgene.kim@samsung.com> Date: Mon, 15 Nov 2010 11:11:22 +0900 Subject: [PATCH 110/179] ARM: S3C24XX: Fix wrong s3c_gpio_cfgpull This patch fixes wrong s3c_gpio_cfgpull with s3c_gpio_setpull. Cc: Ben Dooks <ben-linux@fluff.org> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c | 6 +++--- arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c | 6 +++--- arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c index 9793544a6ace..704175b0573f 100644 --- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c +++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c @@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE); } } diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c index db9e9e477ec1..72457afd6255 100644 --- a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c +++ b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c @@ -31,8 +31,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpd8_9_10(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE); } } diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c index 8ea663a438bb..c3972b645d13 100644 --- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c +++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c @@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE); } } From 8b8c87dee47ae7e41af95d03ca56b3a4633466a6 Mon Sep 17 00:00:00 2001 From: Darius Augulis <augulis.darius@gmail.com> Date: Tue, 16 Nov 2010 18:08:50 +0900 Subject: [PATCH 111/179] ARM: S3C64XX: fix uart clock setup for mini6410/real6410 Don't rewrite clock config in UCON preconfigured by bootloader. No need to set 10th bit in UCON because [11:10] 2'b00 means source clock is PCLK too. If set, console does not work if bootloader has preconfigured [11:10] with 2'b00. If not set, console works with any bootloader config value (2'bxx). More information about clock setup in UCON is available in "S3C6410X RISC Microprocessor User's Manual, Revision 1.20" p. 31-13 (Chapter 31.6.2 UART CONTROL REGISTER). Signed-off-by: Darius Augulis <augulis.darius@gmail.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/mach-s3c64xx/mach-mini6410.c | 2 +- arch/arm/mach-s3c64xx/mach-real6410.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c index 249c62956471..89f35e02e883 100644 --- a/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -45,7 +45,7 @@ #include <video/platform_lcd.h> -#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) +#define UCON S3C2410_UCON_DEFAULT #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index f9ef9b5c5f5a..4957ab0a0d4a 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -46,7 +46,7 @@ #include <video/platform_lcd.h> -#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) +#define UCON S3C2410_UCON_DEFAULT #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) From 18ad782c7fdbbb6b9db9e8d912fee1d783fe79e0 Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan <a.kesavan@samsung.com> Date: Thu, 21 Oct 2010 06:45:48 +0530 Subject: [PATCH 112/179] ARM: S3C24XX: Fix Demux error in UART3 irqs on S3C2443 and S3C2416 IRQ_S3C2443_UART3 is being used as the base when it should actually be IRQ_S3C2443_RX3 on S3C2443 and S3C2416 for the UART3. Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com> Signed-off-by: Sangbeom Kim <sbkim73@samsung.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/mach-s3c2416/irq.c | 2 +- arch/arm/mach-s3c2443/irq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c index 084d121f368c..784485777896 100644 --- a/arch/arm/mach-s3c2416/irq.c +++ b/arch/arm/mach-s3c2416/irq.c @@ -168,7 +168,7 @@ static struct irq_chip s3c2416_irq_dma = { static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) { - s3c2416_irq_demux(IRQ_S3C2443_UART3, 3); + s3c2416_irq_demux(IRQ_S3C2443_RX3, 3); } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) diff --git a/arch/arm/mach-s3c2443/irq.c b/arch/arm/mach-s3c2443/irq.c index 0e0d693f3974..df2b8db49818 100644 --- a/arch/arm/mach-s3c2443/irq.c +++ b/arch/arm/mach-s3c2443/irq.c @@ -166,7 +166,7 @@ static struct irq_chip s3c2443_irq_dma = { static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) { - s3c2443_irq_demux(IRQ_S3C2443_UART3, 3); + s3c2443_irq_demux(IRQ_S3C2443_RX3, 3); } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) From 35bbcfe6dd283ab36b46ce7c0b79986acad7b20b Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan <a.kesavan@samsung.com> Date: Thu, 21 Oct 2010 06:45:48 +0530 Subject: [PATCH 113/179] ARM: S3C24XX: Fix UART3 submask on S3C2416 and S3C2443 The UART3 submask should be 0x7 (SUBSRCPND[26:24]). Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com> Signed-off-by: Sangbeom Kim <sbkim73@samsung.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/mach-s3c2416/irq.c | 3 +-- arch/arm/mach-s3c2443/irq.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c index 784485777896..00174daf1526 100644 --- a/arch/arm/mach-s3c2416/irq.c +++ b/arch/arm/mach-s3c2416/irq.c @@ -172,8 +172,7 @@ static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) -#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) - +#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) static void s3c2416_irq_uart3_mask(unsigned int irqno) { diff --git a/arch/arm/mach-s3c2443/irq.c b/arch/arm/mach-s3c2443/irq.c index df2b8db49818..893424767ce1 100644 --- a/arch/arm/mach-s3c2443/irq.c +++ b/arch/arm/mach-s3c2443/irq.c @@ -170,8 +170,7 @@ static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) -#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) - +#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) static void s3c2443_irq_uart3_mask(unsigned int irqno) { From 9adf262a9f81489635f7868c9819cb864192811b Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan <a.kesavan@samsung.com> Date: Fri, 8 Oct 2010 09:07:19 +0530 Subject: [PATCH 114/179] ARM: S5PV210: Fix sysdev related warning messages This patch fixes following warning messages when CONFIG_PM selected. In file included from arch/arm/mach-s5pv210/mach-smdkv210.c:34: arch/arm/plat-samsung/include/plat/pm.h:104: warning: 'struct sys_device' declared inside parameter list arch/arm/plat-samsung/include/plat/pm.h:104: warning: its scope is only this definition or declaration, which is probably not what you want arch/arm/plat-samsung/include/plat/pm.h:105: warning: 'struct sys_device' declared inside parameter list In file included from arch/arm/mach-s5pv210/mach-smdkc110.c:31: arch/arm/plat-samsung/include/plat/pm.h:104: warning: 'struct sys_device' declared inside parameter list arch/arm/plat-samsung/include/plat/pm.h:104: warning: its scope is only this definition or declaration, which is probably not what you want arch/arm/plat-samsung/include/plat/pm.h:105: warning: 'struct sys_device' declared inside parameter list Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com> Signed-off-by: Sangbeom Kim <sbkim73@samsung.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> --- arch/arm/mach-s5pv210/mach-smdkc110.c | 1 + arch/arm/mach-s5pv210/mach-smdkv210.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/mach-s5pv210/mach-smdkc110.c b/arch/arm/mach-s5pv210/mach-smdkc110.c index 0ad7924fe62e..5dd1681c069e 100644 --- a/arch/arm/mach-s5pv210/mach-smdkc110.c +++ b/arch/arm/mach-s5pv210/mach-smdkc110.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/serial_core.h> #include <linux/i2c.h> +#include <linux/sysdev.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c index bcd7a5d53401..1fbc45b2a432 100644 --- a/arch/arm/mach-s5pv210/mach-smdkv210.c +++ b/arch/arm/mach-s5pv210/mach-smdkv210.c @@ -13,6 +13,7 @@ #include <linux/i2c.h> #include <linux/init.h> #include <linux/serial_core.h> +#include <linux/sysdev.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> From 7167594a3da7dcc33203b85d62e519594baee390 Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski <herton@mandriva.com.br> Date: Thu, 25 Nov 2010 00:08:01 -0200 Subject: [PATCH 115/179] ALSA: hda - Fix ALC660-VD/ALC861-VD capture/playback mixers The mixer nids passed to alc_auto_create_input_ctls are wrong: 0x15 is a pin, and 0x09 is the ADC on both ALC660-VD/ALC861-VD. Thus with current code, input playback volume/switches and input source mixer controls are not created, and recording doesn't work. Select correct mixers, 0x0b (input playback mixer) and 0x22 (capture source mixer). Reference: https://qa.mandriva.com/show_bug.cgi?id=61159 Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br> Cc: <stable@kernel.org> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b0e6b8b47fa9..81a2a49b862c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -16943,7 +16943,7 @@ static struct alc_config_preset alc861vd_presets[] = { static int alc861vd_auto_create_input_ctls(struct hda_codec *codec, const struct auto_pin_cfg *cfg) { - return alc_auto_create_input_ctls(codec, cfg, 0x15, 0x09, 0); + return alc_auto_create_input_ctls(codec, cfg, 0x0b, 0x22, 0); } From e8129c642155616d9e2160a75f103e127c8c3708 Mon Sep 17 00:00:00 2001 From: Heiko Carstens <heiko.carstens@de.ibm.com> Date: Thu, 25 Nov 2010 09:52:45 +0100 Subject: [PATCH 116/179] [S390] nmi: fix clock comparator revalidation On each machine check all registers are revalidated. The save area for the clock comparator however only contains the upper most seven bytes of the former contents, if valid. Therefore the machine check handler uses a store clock instruction to get the current time and writes that to the clock comparator register which in turn will generate an immediate timer interrupt. However within the lowcore the expected time of the next timer interrupt is stored. If the interrupt happens before that time the handler won't be called. In turn the clock comparator won't be reprogrammed and therefore the interrupt condition stays pending which causes an interrupt loop until the expected time is reached. On NOHZ machines this can result in unresponsive machines since the time of the next expected interrupted can be a couple of days in the future. To fix this just revalidate the clock comparator register with the expected value. In addition the special handling for udelay must be changed as well. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/kernel/nmi.c | 10 ++++------ arch/s390/lib/delay.c | 14 +++++++++----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index ac151399ef34..1995c1712fc8 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -95,7 +95,6 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck); static int notrace s390_revalidate_registers(struct mci *mci) { int kill_task; - u64 tmpclock; u64 zero; void *fpt_save_area, *fpt_creg_save_area; @@ -214,11 +213,10 @@ static int notrace s390_revalidate_registers(struct mci *mci) : "0", "cc"); #endif /* Revalidate clock comparator register */ - asm volatile( - " stck 0(%1)\n" - " sckc 0(%1)" - : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory"); - + if (S390_lowcore.clock_comparator == -1) + set_clock_comparator(S390_lowcore.mcck_clock); + else + set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ if (!mci->wp) /* diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 752b362bf651..7c37ec359ec2 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -29,17 +29,21 @@ static void __udelay_disabled(unsigned long long usecs) { unsigned long mask, cr0, cr0_saved; u64 clock_saved; + u64 end; + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); - set_clock_comparator(get_clock() + (usecs << 12)); __ctl_store(cr0_saved, 0, 0); cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; __ctl_load(cr0 , 0, 0); - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; lockdep_off(); - trace_hardirqs_on(); - __load_psw_mask(mask); - local_irq_disable(); + do { + set_clock_comparator(end); + trace_hardirqs_on(); + __load_psw_mask(mask); + local_irq_disable(); + } while (get_clock() < end); lockdep_on(); __ctl_load(cr0_saved, 0, 0); local_tick_enable(clock_saved); From 4814a2b3c603438ed8c330c74b49aa662b1ede43 Mon Sep 17 00:00:00 2001 From: Jan Glauber <jang@linux.vnet.ibm.com> Date: Thu, 25 Nov 2010 09:52:46 +0100 Subject: [PATCH 117/179] [S390] qdio: free indicator after reset is finished The qdio device indicator is freed before the device is notified that the indicator is reset. This sequence contains a race when the freed indicator is used by a new device while the reset of the indicator is still pending. Do the reset operation before freeing the indicator to avoid that potential race. Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_thinint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 752dbee06af5..5d9c66627b6e 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -292,8 +292,8 @@ void qdio_shutdown_thinint(struct qdio_irq *irq_ptr) return; /* reset adapter interrupt indicators */ - put_indicator(irq_ptr->dsci); set_subchannel_ind(irq_ptr, 1); + put_indicator(irq_ptr->dsci); } void __exit tiqdio_unregister_thinints(void) From 11cd1a8b8cad1acfc140d9acce93762a9c140b20 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" <mst@redhat.com> Date: Sun, 14 Nov 2010 17:31:52 +0200 Subject: [PATCH 118/179] vhost/net: fix rcu check usage Incorrect rcu check was used as rcu isn't done under mutex here. Force check to 1 for now, to stop it from complaining. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- drivers/vhost/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4b4da5b86ff9..f442668a1e52 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -129,8 +129,9 @@ static void handle_tx(struct vhost_net *net) size_t hdr_size; struct socket *sock; - sock = rcu_dereference_check(vq->private_data, - lockdep_is_held(&vq->mutex)); + /* TODO: check that we are running from vhost_worker? + * Not sure it's worth it, it's straight-forward enough. */ + sock = rcu_dereference_check(vq->private_data, 1); if (!sock) return; From 846172dfe33c7ee07638e04f94dd90e21dfdc5ba Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow <mad_soft@inbox.ru> Date: Thu, 25 Nov 2010 00:46:15 +0300 Subject: [PATCH 119/179] ASoC: fix SND_PXA2XX_LIB Kconfig warning Fix following warning observed when SND_PXA2XX_SOC is set and SND_ARM isn't: warning: (SND_PXA2XX_AC97 && SOUND && !M68K && SND && SND_ARM && ARCH_PXA || SND_PXA2XX_SOC && SOUND && !M68K && SND && SND_SOC && ARCH_PXA) selects SND_PXA2XX_LIB which has unmet direct dependencies (SOUND && !M68K && SND && SND_ARM) Signed-off-by: Dmitry Artamonow <mad_soft@inbox.ru> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/pxa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 37f191bbfdd9..580f48571303 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,6 +1,7 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA + select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to From 3b6bc354cb22b1069f88acdc7673d3476fbadfca Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 17:23:55 +0800 Subject: [PATCH 120/179] ASoC: Call snd_soc_unregister_dais instead of snd_soc_unregister_dai in sh4_soc_dai_remove We call snd_soc_register_dais() in sh4_soc_dai_probe(), thus we should call snd_soc_unregister_dais() in sh4_soc_dai_remove(). Otherwise, we got "too many arguments to function 'snd_soc_unregister_dai'" error message. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/sh/ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c index 40bbdf1591dc..05192d97b377 100644 --- a/sound/soc/sh/ssi.c +++ b/sound/soc/sh/ssi.c @@ -387,7 +387,7 @@ static int __devinit sh4_soc_dai_probe(struct platform_device *pdev) static int __devexit sh4_soc_dai_remove(struct platform_device *pdev) { - snd_soc_unregister_dai(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai)); + snd_soc_unregister_dais(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai)); return 0; } From 4e1f86509732ccc39938974db0612d14afbca953 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:07:25 +0800 Subject: [PATCH 121/179] ASoC: efika-audio-fabric: fix resource leak in efika_fabric_init error path Add missing platform_device_put() if platform_device_add() failed. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/fsl/efika-audio-fabric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c index 53251e6b5bd5..108b5d8bd0e9 100644 --- a/sound/soc/fsl/efika-audio-fabric.c +++ b/sound/soc/fsl/efika-audio-fabric.c @@ -76,6 +76,7 @@ static __init int efika_fabric_init(void) rc = platform_device_add(pdev); if (rc) { pr_err("efika_fabric_init: platform_device_add() failed\n"); + platform_device_put(pdev); return -ENODEV; } return 0; From 917dac0ff1754776b86967b0ec1750022d9c4265 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:08:31 +0800 Subject: [PATCH 122/179] ASoC: pcm030-audio-fabric: fix resource leak in pcm030_fabric_init error path Add missing platform_device_put() if platform_device_add() failed. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/fsl/pcm030-audio-fabric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c index 25f27ec1dd6e..ba4d85e317ed 100644 --- a/sound/soc/fsl/pcm030-audio-fabric.c +++ b/sound/soc/fsl/pcm030-audio-fabric.c @@ -76,6 +76,7 @@ static __init int pcm030_fabric_init(void) rc = platform_device_add(pdev); if (rc) { pr_err("pcm030_fabric_init: platform_device_add() failed\n"); + platform_device_put(pdev); return -ENODEV; } return 0; From b193deead8637291138a8c1c49753ee686fa5b17 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 10:44:59 +0800 Subject: [PATCH 123/179] ASoC: snd-soc-afeb9260: remove unneeded platform_device_del in error path Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/atmel/snd-soc-afeb9260.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c index e3d283561c19..86e0f8586dc3 100644 --- a/sound/soc/atmel/snd-soc-afeb9260.c +++ b/sound/soc/atmel/snd-soc-afeb9260.c @@ -167,7 +167,6 @@ static int __init afeb9260_soc_init(void) return 0; err1: - platform_device_del(afeb9260_snd_device); platform_device_put(afeb9260_snd_device); return err; } From c7a734e58ed237ecac2608a70eb31ba64e21c768 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:11:03 +0800 Subject: [PATCH 124/179] ASoC: sam9g20_wm8731: fix resource leak in at91sam9g20ek_init error path Fix the error path to properly free allocated resources. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/atmel/sam9g20_wm8731.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 032e17dd8fdb..e521ada80542 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -240,6 +240,7 @@ static int __init at91sam9g20ek_init(void) if (!at91sam9g20ek_snd_device) { printk(KERN_ERR "ASoC: Platform device allocation failed\n"); ret = -ENOMEM; + goto err_mclk; } platform_set_drvdata(at91sam9g20ek_snd_device, @@ -248,11 +249,13 @@ static int __init at91sam9g20ek_init(void) ret = platform_device_add(at91sam9g20ek_snd_device); if (ret) { printk(KERN_ERR "ASoC: Platform device allocation failed\n"); - platform_device_put(at91sam9g20ek_snd_device); + goto err_device_add; } return ret; +err_device_add: + platform_device_put(at91sam9g20ek_snd_device); err_mclk: clk_put(mclk); mclk = NULL; From 14abca3dfc51c0a4f798183f131d63bfd6552bd4 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:12:30 +0800 Subject: [PATCH 125/179] ASoC: simone: fix resource leak in simone_init error path Fix the error path to properly free allocated resources. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Mika Westerberg <mika.westerberg@iki.fi> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/ep93xx/simone.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sound/soc/ep93xx/simone.c b/sound/soc/ep93xx/simone.c index 4b0d19913728..286817946c56 100644 --- a/sound/soc/ep93xx/simone.c +++ b/sound/soc/ep93xx/simone.c @@ -54,24 +54,26 @@ static int __init simone_init(void) ret = platform_device_add(simone_snd_ac97_device); if (ret) - goto fail; + goto fail1; simone_snd_device = platform_device_alloc("soc-audio", -1); if (!simone_snd_device) { ret = -ENOMEM; - goto fail; + goto fail2; } platform_set_drvdata(simone_snd_device, &snd_soc_simone); ret = platform_device_add(simone_snd_device); - if (ret) { - platform_device_put(simone_snd_device); - goto fail; - } + if (ret) + goto fail3; - return ret; + return 0; -fail: +fail3: + platform_device_put(simone_snd_device); +fail2: + platform_device_del(simone_snd_ac97_device); +fail1: platform_device_put(simone_snd_ac97_device); return ret; } From ac8f924af555573e29b126ac5cef4fdd122ae517 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:13:09 +0800 Subject: [PATCH 126/179] ASoC: imx-ssi: fix resource leak Fix imx_ssi_probe() error path and imx_ssi_remove() to properly free allocated resources. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Sascha Hauer <s.hauer@pengutronix.de> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/imx/imx-ssi.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c index d2d98c75ee8a..390b6ffc2658 100644 --- a/sound/soc/imx/imx-ssi.c +++ b/sound/soc/imx/imx-ssi.c @@ -679,8 +679,11 @@ static int imx_ssi_probe(struct platform_device *pdev) } ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id); - if (!ssi->soc_platform_pdev_fiq) + if (!ssi->soc_platform_pdev_fiq) { + ret = -ENOMEM; goto failed_pdev_fiq_alloc; + } + platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi); ret = platform_device_add(ssi->soc_platform_pdev_fiq); if (ret) { @@ -689,8 +692,11 @@ static int imx_ssi_probe(struct platform_device *pdev) } ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id); - if (!ssi->soc_platform_pdev) + if (!ssi->soc_platform_pdev) { + ret = -ENOMEM; goto failed_pdev_alloc; + } + platform_set_drvdata(ssi->soc_platform_pdev, ssi); ret = platform_device_add(ssi->soc_platform_pdev); if (ret) { @@ -703,6 +709,7 @@ static int imx_ssi_probe(struct platform_device *pdev) failed_pdev_add: platform_device_put(ssi->soc_platform_pdev); failed_pdev_alloc: + platform_device_del(ssi->soc_platform_pdev_fiq); failed_pdev_fiq_add: platform_device_put(ssi->soc_platform_pdev_fiq); failed_pdev_fiq_alloc: @@ -726,8 +733,8 @@ static int __devexit imx_ssi_remove(struct platform_device *pdev) struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); struct imx_ssi *ssi = platform_get_drvdata(pdev); - platform_device_del(ssi->soc_platform_pdev); - platform_device_put(ssi->soc_platform_pdev); + platform_device_unregister(ssi->soc_platform_pdev); + platform_device_unregister(ssi->soc_platform_pdev_fiq); snd_soc_unregister_dai(&pdev->dev); From 09de9533348632fbbf32ce618f669882aa718817 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 15:14:03 +0800 Subject: [PATCH 127/179] ASoC: phycore-ac97: fix resource leak Fix imx_phycore_init() error path and imx_phycore_exit() to properly free allocated resources. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Sascha Hauer <s.hauer@pengutronix.de> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/imx/phycore-ac97.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c index 39f23734781a..9eabc28667e6 100644 --- a/sound/soc/imx/phycore-ac97.c +++ b/sound/soc/imx/phycore-ac97.c @@ -43,6 +43,7 @@ static struct snd_soc_card imx_phycore = { .num_links = ARRAY_SIZE(imx_phycore_dai_ac97), }; +static struct platform_device *imx_phycore_snd_ac97_device; static struct platform_device *imx_phycore_snd_device; static int __init imx_phycore_init(void) @@ -53,29 +54,42 @@ static int __init imx_phycore_init(void) /* return happy. We might run on a totally different machine */ return 0; - imx_phycore_snd_device = platform_device_alloc("soc-audio", -1); - if (!imx_phycore_snd_device) + imx_phycore_snd_ac97_device = platform_device_alloc("soc-audio", -1); + if (!imx_phycore_snd_ac97_device) return -ENOMEM; - platform_set_drvdata(imx_phycore_snd_device, &imx_phycore); - ret = platform_device_add(imx_phycore_snd_device); + platform_set_drvdata(imx_phycore_snd_ac97_device, &imx_phycore); + ret = platform_device_add(imx_phycore_snd_ac97_device); + if (ret) + goto fail1; imx_phycore_snd_device = platform_device_alloc("wm9712-codec", -1); - if (!imx_phycore_snd_device) - return -ENOMEM; + if (!imx_phycore_snd_device) { + ret = -ENOMEM; + goto fail2; + } ret = platform_device_add(imx_phycore_snd_device); if (ret) { printk(KERN_ERR "ASoC: Platform device allocation failed\n"); - platform_device_put(imx_phycore_snd_device); + goto fail3; } + return 0; + +fail3: + platform_device_put(imx_phycore_snd_device); +fail2: + platform_device_del(imx_phycore_snd_ac97_device); +fail1: + platform_device_put(imx_phycore_snd_ac97_device); return ret; } static void __exit imx_phycore_exit(void) { platform_device_unregister(imx_phycore_snd_device); + platform_device_unregister(imx_phycore_snd_ac97_device); } late_initcall(imx_phycore_init); From 8b6b30ab665d3bbb23180c39f6215e6f64516ed0 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Thu, 25 Nov 2010 11:33:14 +0800 Subject: [PATCH 128/179] ASoC: davinci-vcif - fix a memory leak Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/davinci/davinci-vcif.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c index fb4cc1edf339..9d2afccc3a2d 100644 --- a/sound/soc/davinci/davinci-vcif.c +++ b/sound/soc/davinci/davinci-vcif.c @@ -247,7 +247,10 @@ static int davinci_vcif_probe(struct platform_device *pdev) static int davinci_vcif_remove(struct platform_device *pdev) { + struct davinci_vcif_dev *davinci_vcif_dev = dev_get_drvdata(&pdev->dev); + snd_soc_unregister_dai(&pdev->dev); + kfree(davinci_vcif_dev); return 0; } From 5989f11ba11c72f98d57580b991418d9ff6a207d Mon Sep 17 00:00:00 2001 From: Raoul Bhatia <r.bhatia@ipax.at> Date: Thu, 25 Nov 2010 17:32:47 +0100 Subject: [PATCH 129/179] EDAC: Fix typos in Documentation/edac.txt Fix trivial typos in edac.txt Signed-off-by: Raoul Bhatia <r.bhatia@ipax.at> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> --- Documentation/edac.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 0b875e8da969..9ee774de57cd 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -196,7 +196,7 @@ csrow3. The representation of the above is reflected in the directory tree in EDAC's sysfs interface. Starting in directory /sys/devices/system/edac/mc each memory controller will be represented -by its own 'mcX' directory, where 'X" is the index of the MC. +by its own 'mcX' directory, where 'X' is the index of the MC. ..../edac/mc/ @@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X" is the index of the MC. .... Under each 'mcX' directory each 'csrowX' is again represented by a -'csrowX', where 'X" is the csrow index: +'csrowX', where 'X' is the csrow index: .../mc/mc0/ @@ -232,7 +232,7 @@ EDAC control and attribute files. In 'mcX' directories are EDAC control and attribute files for -this 'X" instance of the memory controllers: +this 'X' instance of the memory controllers: Counter reset control file: @@ -343,7 +343,7 @@ Sdram memory scrubbing rate: 'csrowX' DIRECTORIES In the 'csrowX' directories are EDAC control and attribute files for -this 'X" instance of csrow: +this 'X' instance of csrow: Total Uncorrectable Errors count attribute file: From fe99b55994f08d321cc5f621c3634b1de4961d01 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 22:40:59 +0800 Subject: [PATCH 130/179] ASoC: tlv320aic3x - fix variable may be used uninitialized warning If aic3x_read failed , val is used uninitialized. Fix it by initializing val to 0. This patch fixes below compile warning: sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_get_gpio': sound/soc/codecs/tlv320aic3x.c:1183: warning: 'val' may be used uninitialized in this function sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_headset_detected': sound/soc/codecs/tlv320aic3x.c:1211: warning: 'val' may be used uninitialized in this function sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_button_pressed': sound/soc/codecs/tlv320aic3x.c:1219: warning: 'val' may be used uninitialized in this function Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/codecs/tlv320aic3x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index fc687790188b..77b8f9ae29be 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1176,7 +1176,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_gpio); int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio) { u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG; - u8 val, bit = gpio ? 2: 1; + u8 val = 0, bit = gpio ? 2 : 1; aic3x_read(codec, reg, &val); return (val >> bit) & 1; @@ -1204,7 +1204,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_headset_detection); int aic3x_headset_detected(struct snd_soc_codec *codec) { - u8 val; + u8 val = 0; aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val); return (val >> 4) & 1; } @@ -1212,7 +1212,7 @@ EXPORT_SYMBOL_GPL(aic3x_headset_detected); int aic3x_button_pressed(struct snd_soc_codec *codec) { - u8 val; + u8 val = 0; aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val); return (val >> 5) & 1; } From 25436180ee8bed6740f29d92c2030c759885c147 Mon Sep 17 00:00:00 2001 From: Axel Lin <axel.lin@gmail.com> Date: Wed, 24 Nov 2010 22:24:01 +0800 Subject: [PATCH 131/179] ASoC: Fix resource reclaim for osk5912 In current implementation, there are resources leak in the error path. This patch properly reclaims the allocated resources in the error path. Also adds a missing clk_put in osk_soc_exit. Signed-off-by: Axel Lin <axel.lin@gmail.com> Acked-by: Jarkko Nikula <jhnikula@gmail.com> Acked-by: Liam Girdwood <lrg@slimlogic.co.uk> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> --- sound/soc/omap/osk5912.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c index f0e662556428..65ae00e976ef 100644 --- a/sound/soc/omap/osk5912.c +++ b/sound/soc/omap/osk5912.c @@ -177,7 +177,8 @@ static int __init osk_soc_init(void) tlv320aic23_mclk = clk_get(dev, "mclk"); if (IS_ERR(tlv320aic23_mclk)) { printk(KERN_ERR "Could not get mclk clock\n"); - return -ENODEV; + err = PTR_ERR(tlv320aic23_mclk); + goto err2; } /* @@ -188,7 +189,7 @@ static int __init osk_soc_init(void) if (clk_set_rate(tlv320aic23_mclk, CODEC_CLOCK)) { printk(KERN_ERR "Cannot set MCLK for AIC23 CODEC\n"); err = -ECANCELED; - goto err1; + goto err3; } } @@ -196,9 +197,12 @@ static int __init osk_soc_init(void) (uint) clk_get_rate(tlv320aic23_mclk), CODEC_CLOCK); return 0; -err1: + +err3: clk_put(tlv320aic23_mclk); +err2: platform_device_del(osk_snd_device); +err1: platform_device_put(osk_snd_device); return err; @@ -207,6 +211,7 @@ static int __init osk_soc_init(void) static void __exit osk_soc_exit(void) { + clk_put(tlv320aic23_mclk); platform_device_unregister(osk_snd_device); } From 4917b284db8607e414c334317b7d15239854d39c Mon Sep 17 00:00:00 2001 From: Randy Dunlap <randy.dunlap@oracle.com> Date: Mon, 22 Nov 2010 12:48:34 -0800 Subject: [PATCH 132/179] dmar, x86: Use function stubs when CONFIG_INTR_REMAP is disabled The stubs for CONFIG_INTR_REMAP disabled need to be functions instead of values to eliminate build warnings. arch/x86/kernel/apic/apic.c: In function 'lapic_suspend': arch/x86/kernel/apic/apic.c:2060:3: warning: statement with no effect arch/x86/kernel/apic/apic.c: In function 'lapic_resume': arch/x86/kernel/apic/apic.c:2137:3: warning: statement with no effect Reported-and-Tested-by: Fabio Comolli <fabio.comolli@gmail.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> LKML-Reference: <20101122124834.74429004.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- include/linux/dmar.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index a7d9dc21391d..7b776d71d36d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -175,10 +175,21 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) return 0; } -#define enable_intr_remapping(mode) (-1) -#define disable_intr_remapping() (0) -#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) + +static inline int enable_intr_remapping(int eim) +{ + return -1; +} + +static inline void disable_intr_remapping(void) +{ +} + +static inline int reenable_intr_remapping(int eim) +{ + return 0; +} #endif /* Can't use the common MSI interrupt functions From dddd3379a619a4cb8247bfd3c94ca9ae3797aa2e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Wed, 24 Nov 2010 10:05:55 +0100 Subject: [PATCH 133/179] perf: Fix inherit vs. context rotation bug It was found that sometimes children of tasks with inherited events had one extra event. Eventually it turned out to be due to the list rotation no being exclusive with the list iteration in the inheritance code. Cure this by temporarily disabling the rotation while we inherit the events. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Cc: <stable@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- include/linux/perf_event.h | 1 + kernel/perf_event.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 40150f345982..142e3d6042c7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -850,6 +850,7 @@ struct perf_event_context { int nr_active; int is_active; int nr_stat; + int rotate_disable; atomic_t refcount; struct task_struct *task; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 671f6c8c8a32..f365dd8ef8b0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1622,8 +1622,12 @@ static void rotate_ctx(struct perf_event_context *ctx) { raw_spin_lock(&ctx->lock); - /* Rotate the first entry last of non-pinned groups */ - list_rotate_left(&ctx->flexible_groups); + /* + * Rotate the first entry last of non-pinned groups. Rotation might be + * disabled by the inheritance code. + */ + if (!ctx->rotate_disable) + list_rotate_left(&ctx->flexible_groups); raw_spin_unlock(&ctx->lock); } @@ -6162,6 +6166,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) struct perf_event *event; struct task_struct *parent = current; int inherited_all = 1; + unsigned long flags; int ret = 0; child->perf_event_ctxp[ctxn] = NULL; @@ -6202,6 +6207,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) break; } + /* + * We can't hold ctx->lock when iterating the ->flexible_group list due + * to allocations, but we need to prevent rotation because + * rotate_ctx() will change the list from interrupt context. + */ + raw_spin_lock_irqsave(&parent_ctx->lock, flags); + parent_ctx->rotate_disable = 1; + raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); + list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); @@ -6209,6 +6223,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) break; } + raw_spin_lock_irqsave(&parent_ctx->lock, flags); + parent_ctx->rotate_disable = 0; + raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); + child_ctx = child->perf_event_ctxp[ctxn]; if (child_ctx && inherited_all) { From 33c6d6a7ad0ffab9b1b15f8e4107a2af072a05a0 Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Mon, 22 Nov 2010 16:55:23 -0500 Subject: [PATCH 134/179] x86, perf, nmi: Disable perf if counters are not accessible In a kvm virt guests, the perf counters are not emulated. Instead they return zero on a rdmsrl. The perf nmi handler uses the fact that crossing a zero means the counter overflowed (for those counters that do not have specific interrupt bits). Therefore on kvm guests, perf will swallow all NMIs thinking the counters overflowed. This causes problems for subsystems like kgdb which needs NMIs to do its magic. This problem was discovered by running kgdb tests. The solution is to write garbage into a perf counter during the initialization and hopefully reading back the same number. On kvm guests, the value will be read back as zero and we disable perf as a result. Reported-by: Jason Wessel <jason.wessel@windriver.com> Patch-inspired-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> LKML-Reference: <1290462923-30734-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183efb..6d75b9145b13 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {} #endif +static bool check_hw_exists(void) +{ + u64 val, val_new = 0; + int ret = 0; + + val = 0xabcdUL; + ret |= checking_wrmsrl(x86_pmu.perfctr, val); + ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); + if (ret || val != val_new) + return false; + + return true; +} + static void reserve_ds_buffers(void); static void release_ds_buffers(void); @@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void) pmu_check_apic(); + /* sanity check that the hardware exists or is emulated */ + if (!check_hw_exists()) { + pr_cont("Broken PMU hardware detected, software events only.\n"); + return; + } + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.quirks) From cc2067a51424dd25c10c1b1230b4222d8baec94d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <a.p.zijlstra@chello.nl> Date: Tue, 16 Nov 2010 21:49:01 +0100 Subject: [PATCH 135/179] perf, x86: Fixup Kconfig deps This leads to a Kconfig dep inversion, x86 selects PERF_EVENT (due to a hw_breakpoint dep) but doesn't unconditionally provide HAVE_PERF_EVENT. (This can cause build failures on M386/M486 kernel .config's.) Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20101117222055.982965150@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e8327686d3c5..e330da21b84f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,7 +21,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE - select HAVE_PERF_EVENTS if (!M386 && !M486) + select HAVE_PERF_EVENTS select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES From ee6dcfa40a50fe12a3ae0fb4d2653c66c3ed6556 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <a.p.zijlstra@chello.nl> Date: Fri, 26 Nov 2010 13:49:04 +0100 Subject: [PATCH 136/179] perf: Fix the software context switch counter Stephane noticed that because the perf_sw_event() call is inside the perf_event_task_sched_out() call it won't get called unless we have a per-task counter. Reported-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- include/linux/perf_event.h | 29 +++++++++++++++-------------- kernel/perf_event.c | 2 -- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 142e3d6042c7..de2c41758e29 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -909,20 +909,6 @@ extern int perf_num_counters(void); extern const char *perf_pmu_name(void); extern void __perf_event_task_sched_in(struct task_struct *task); extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); - -extern atomic_t perf_task_events; - -static inline void perf_event_task_sched_in(struct task_struct *task) -{ - COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); -} - -static inline -void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) -{ - COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); -} - extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1031,6 +1017,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) __perf_sw_event(event_id, nr, nmi, regs, addr); } +extern atomic_t perf_task_events; + +static inline void perf_event_task_sched_in(struct task_struct *task) +{ + COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); +} + +static inline +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +{ + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + + COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f365dd8ef8b0..eac7e3364335 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1287,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); } From 5a8cfb4e8ae317d283f84122ed20faa069c5e0c4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Fri, 26 Nov 2010 17:11:18 +0100 Subject: [PATCH 137/179] ALSA: hda - Use ALC_INIT_DEFAULT for really default initialization When SKU assid gives no valid bits for 0x38, the driver didn't take any action, so far. This resulted in the missing initialization for external amps, etc, thus the silent output in the end. Especially users hit this problem on ALC888 newly since 2.6.35, where the driver doesn't force to use ALC_INIT_DEFAULT any more. This patch sets the default initialization scheme to use ALC_INIT_DEFAULT when no valid bits are set for SKU assid. Reference: https://bugzilla.redhat.com/show_bug.cgi?id=657388 Reported-and-tested-by: Kyle McMartin <kyle@redhat.com> Cc: <stable@kernel.org> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 81a2a49b862c..886d7c72936e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1614,6 +1614,7 @@ static int alc_subsystem_id(struct hda_codec *codec, spec->init_amp = ALC_INIT_GPIO3; break; case 5: + default: spec->init_amp = ALC_INIT_DEFAULT; break; } From 55a61d1d06a3dc443d0db8aaa613365dcb83b98a Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Mon, 22 Nov 2010 18:50:32 +0000 Subject: [PATCH 138/179] Btrfs: fix typo in fallocate to make it honor actual size There is a typo in __btrfs_prealloc_file_range() where we set the i_size to actual_len/cur_offset, and then just set it to cur_offset again, and do the same with btrfs_ordered_update_i_size(). This fixes it back to keeping i_size in a local variable and then updating i_size properly. Tested this with xfs_io -F -f -c "falloc 0 1" -c "pwrite 0 1" foo stat'ing foo gives us a size of 1 instead of 4096 like it was. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 37cc1776a5d7..0058fb3c2561 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7002,6 +7002,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 i_size; int ret = 0; bool own_trans = true; @@ -7043,11 +7044,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, (actual_len > inode->i_size) && (cur_offset > inode->i_size)) { if (cur_offset > actual_len) - i_size_write(inode, actual_len); + i_size = actual_len; else - i_size_write(inode, cur_offset); - i_size_write(inode, cur_offset); - btrfs_ordered_update_i_size(inode, cur_offset, NULL); + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); } ret = btrfs_update_inode(trans, root, inode); From 0ed42a63f3edb144b091d9528401fce95c3c4d8d Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Mon, 22 Nov 2010 18:55:39 +0000 Subject: [PATCH 139/179] Btrfs: make sure new inode size is ok in fallocate We have been failing xfstest 228 forever, because we don't check to make sure the new inode size is acceptable as far as RLIMIT is concerned. Just check to make sure it's ok to create a inode with this new size and error out if not. With this patch we now pass 228. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0058fb3c2561..0eeacd93e8e5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7102,6 +7102,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, alloc_end); + if (ret) + goto out; + if (alloc_start > inode->i_size) { ret = btrfs_cont_expand(inode, alloc_start); if (ret) From bc1cbf1f86aa2501efa9ca637c736fce6bcc4b1d Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Tue, 23 Nov 2010 19:50:59 +0000 Subject: [PATCH 140/179] Btrfs: update inode ctime when using links Currently we fail xfstest 236 because we're not updating the inode ctime on link. This is a simple fix, and makes it so we pass 236 now. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0eeacd93e8e5..6df921f218fb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4785,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return -EPERM; btrfs_inc_nlink(inode); + inode->i_ctime = CURRENT_TIME; err = btrfs_set_inode_index(dir, &index); if (err) From 619c8c763928841b1112e1d417f88bc1d44daecb Mon Sep 17 00:00:00 2001 From: Ian Kent <raven@themaw.net> Date: Mon, 22 Nov 2010 02:21:38 +0000 Subject: [PATCH 141/179] Btrfs - fix race between btrfs_get_sb() and umount When mounting a btrfs file system btrfs_test_super() may attempt to use sb->s_fs_info, the btrfs root, of a super block that is going away and that has had the btrfs root set to NULL in its ->put_super(). But if the super block is going away it cannot be an existing super block so we can return false in this case. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66e4612a7916..141fb317d3bc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -566,6 +566,12 @@ static int btrfs_test_super(struct super_block *s, void *data) struct btrfs_fs_devices *test_fs_devices = data; struct btrfs_root *root = btrfs_sb(s); + /* + * If this super block is going away, return false as it + * can't match as an existing super block. + */ + if (!atomic_read(&s->s_active)) + return 0; return root->fs_info->fs_devices == test_fs_devices; } From 975f84fee2e8a77ee5f41bfe7c5682bf29366b10 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Tue, 23 Nov 2010 19:36:57 +0000 Subject: [PATCH 142/179] Btrfs: fix fiemap There are two big problems currently with FIEMAP 1) We return extents for holes. This isn't supposed to happen, we just don't return extents for holes and then userspace interprets the lack of an extent as a hole. 2) We sometimes don't set FIEMAP_EXTENT_LAST properly. This is because we wait to see a EXTENT_FLAG_VACANCY flag on the em, but this won't happen if say we ask fiemap to map up to the last extent in a file, and there is nothing but holes up to the i_size. To fix this we need to lookup the last extent in this file and save the logical offset, so if we happen to try and map that extent we can be sure to set FIEMAP_EXTENT_LAST. With this patch we now pass xfstest 225, which we never have before. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/extent_io.c | 63 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 143d3f541d64..5e7a94d7da89 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2901,21 +2901,53 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { - int ret; + int ret = 0; u64 off = start; u64 max = start + len; u32 flags = 0; + u32 found_type; + u64 last; u64 disko = 0; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_state *cached_state = NULL; + struct btrfs_path *path; + struct btrfs_file_extent_item *item; int end = 0; u64 em_start = 0, em_len = 0; unsigned long emflags; - ret = 0; + int hole = 0; if (len == 0) return -EINVAL; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, + path, inode->i_ino, -1, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + WARN_ON(!ret); + path->slots[0]--; + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + + /* No extents, just return */ + if (found_key.objectid != inode->i_ino || + found_type != BTRFS_EXTENT_DATA_KEY) { + btrfs_free_path(path); + return 0; + } + last = found_key.offset; + btrfs_free_path(path); + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, &cached_state, GFP_NOFS); em = get_extent(inode, NULL, 0, off, max - off, 0); @@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ret = PTR_ERR(em); goto out; } + while (!end) { + hole = 0; off = em->start + em->len; if (off >= max) end = 1; + if (em->block_start == EXTENT_MAP_HOLE) { + hole = 1; + goto next; + } + em_start = em->start; em_len = em->len; @@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (em->block_start == EXTENT_MAP_LAST_BYTE) { end = 1; flags |= FIEMAP_EXTENT_LAST; - } else if (em->block_start == EXTENT_MAP_HOLE) { - flags |= FIEMAP_EXTENT_UNWRITTEN; } else if (em->block_start == EXTENT_MAP_INLINE) { flags |= (FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED); @@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; +next: emflags = em->flags; free_extent_map(em); em = NULL; - if (!end) { em = get_extent(inode, NULL, 0, off, max - off, 0); if (!em) @@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } emflags = em->flags; } + if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { flags |= FIEMAP_EXTENT_LAST; end = 1; } - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, - em_len, flags); - if (ret) - goto out_free; + if (em_start == last) { + flags |= FIEMAP_EXTENT_LAST; + end = 1; + } + + if (!hole) { + ret = fiemap_fill_next_extent(fieinfo, em_start, disko, + em_len, flags); + if (ret) + goto out_free; + } } out_free: free_extent_map(em); From 450ba0ea06b6ed3612d27f2b7127a9de4160f285 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@redhat.com> Date: Fri, 19 Nov 2010 14:59:15 -0500 Subject: [PATCH 143/179] Btrfs: setup blank root and fs_info for mount time There is a problem with how we use sget, it searches through the list of supers attached to the fs_type looking for a super with the same fs_devices as what we're trying to mount. This depends on sb->s_fs_info being filled, but we don't fill that in until we get to btrfs_fill_super, so we could hit supers on the fs_type super list that have a null s_fs_info. In order to fix that we need to go ahead and setup a blank root with a blank fs_info to hold fs_devices, that way our test will work out right and then we can set s_fs_info in btrfs_set_super, and then open_ctree will simply use our pre-allocated root and fs_info when setting everything up. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/disk-io.c | 6 ++---- fs/btrfs/super.c | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a67b98d58c2a..57c9d8eeb7dc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1561,10 +1561,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); - struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), - GFP_NOFS); + struct btrfs_root *tree_root = btrfs_sb(sb); + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 141fb317d3bc..47bf67cbe6bf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -563,7 +563,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) static int btrfs_test_super(struct super_block *s, void *data) { - struct btrfs_fs_devices *test_fs_devices = data; + struct btrfs_root *test_root = data; struct btrfs_root *root = btrfs_sb(s); /* @@ -572,9 +572,17 @@ static int btrfs_test_super(struct super_block *s, void *data) */ if (!atomic_read(&s->s_active)) return 0; - return root->fs_info->fs_devices == test_fs_devices; + return root->fs_info->fs_devices == test_root->fs_info->fs_devices; } +static int btrfs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + + return set_anon_super(s, data); +} + + /* * Find a superblock for the given device / mount point. * @@ -588,6 +596,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; + struct btrfs_root *tree_root = NULL; + struct btrfs_fs_info *fs_info = NULL; fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; @@ -615,8 +625,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_close_devices; } + /* + * Setup a dummy root and fs_info for test/set super. This is because + * we don't actually fill this stuff out until open_ctree, but we need + * it for searching for existing supers, so this lets us do that and + * then open_ctree will properly initialize everything later. + */ + fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); + tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + if (!fs_info || !tree_root) { + error = -ENOMEM; + goto error_close_devices; + } + fs_info->tree_root = tree_root; + fs_info->fs_devices = fs_devices; + tree_root->fs_info = fs_info; + bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); if (IS_ERR(s)) goto error_s; @@ -685,6 +711,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, error = PTR_ERR(s); error_close_devices: btrfs_close_devices(fs_devices); + kfree(fs_info); + kfree(tree_root); error_free_subvol_name: kfree(subvol_name); return error; From 4cb6a614ba0e58cae8abdadbf73bcb4d37a3f599 Mon Sep 17 00:00:00 2001 From: Tracey Dent <tdent48227@gmail.com> Date: Sun, 21 Nov 2010 15:23:50 +0000 Subject: [PATCH 144/179] Net: ceph: Makefile: Remove unnessary code Remove the if and else conditional because the code is in mainline and there is no need in it being there. Signed-off-by: Tracey Dent <tdent48227@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ceph/Makefile | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/net/ceph/Makefile b/net/ceph/Makefile index aab1cabb8035..5f19415ec9c0 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -1,9 +1,6 @@ # # Makefile for CEPH filesystem. # - -ifneq ($(KERNELRELEASE),) - obj-$(CONFIG_CEPH_LIB) += libceph.o libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ @@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ pagevec.o -else -#Otherwise we were called directly from the command -# line; invoke the kernel build system. - -KERNELDIR ?= /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) - -default: all - -all: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install - -clean: - $(MAKE) -C $(KERNELDIR) M=$(PWD) clean - -endif From 8475ef9fd16cadbfc692f78e608d1941a340beb2 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov <xemul@parallels.com> Date: Mon, 22 Nov 2010 03:26:12 +0000 Subject: [PATCH 145/179] netns: Don't leak others' openreq-s in proc The /proc/net/tcp leaks openreq sockets from other namespaces. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/tcp_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69ccbc1dde9c..e13da6de1fc7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2043,7 +2043,9 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } get_sk: sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) { cur = sk; goto out; } From 0147fc058d11bd4009b126d09974d2c8f48fef15 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan <adobriyan@gmail.com> Date: Mon, 22 Nov 2010 12:54:21 +0000 Subject: [PATCH 146/179] tcp: restrict net.ipv4.tcp_adv_win_scale (#20312) tcp_win_from_space() does the following: if (sysctl_tcp_adv_win_scale <= 0) return space >> (-sysctl_tcp_adv_win_scale); else return space - (space >> sysctl_tcp_adv_win_scale); "space" is int. As per C99 6.5.7 (3) shifting int for 32 or more bits is undefined behaviour. Indeed, if sysctl_tcp_adv_win_scale is exactly 32, space >> 32 equals space and function returns 0. Which means we busyloop in tcp_fixup_rcvbuf(). Restrict net.ipv4.tcp_adv_win_scale to [-31, 31]. Fix https://bugzilla.kernel.org/show_bug.cgi?id=20312 Steps to reproduce: echo 32 >/proc/sys/net/ipv4/tcp_adv_win_scale wget www.kernel.org [softlockup] Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- Documentation/networking/ip-sysctl.txt | 1 + net/ipv4/sysctl_net_ipv4.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fe95105992c5..3c5e465296e1 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -144,6 +144,7 @@ tcp_adv_win_scale - INTEGER Count buffering overhead as bytes/2^tcp_adv_win_scale (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), if it is <= 0. + Possible values are [-31, 31], inclusive. Default: 2 tcp_allowed_congestion_control - STRING diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e91911d7aae2..1b4ec21497a4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_adv_win_scale_min = -31; +static int tcp_adv_win_scale_max = 31; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, }, { .procname = "tcp_tw_reuse", From 0ac78870220b6e0ac74dd9292bcfa7b18718babd Mon Sep 17 00:00:00 2001 From: Gerrit Renker <gerrit@erg.abdn.ac.uk> Date: Tue, 23 Nov 2010 02:36:56 +0000 Subject: [PATCH 147/179] dccp: fix error in updating the GAR This fixes a bug in updating the Greatest Acknowledgment number Received (GAR): the current implementation does not track the greatest received value - lower values in the range AWL..AWH (RFC 4340, 7.5.1) erase higher ones. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/dccp/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 265985370fa1..e424a09e83f6 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + ackno != DCCP_PKT_WITHOUT_ACK_SEQ && + after48(ackno, dp->dccps_gar)) dp->dccps_gar = ackno; } else { unsigned long now = jiffies; From 462ca99c2ff6caae94dde5c05b56b54f6c01602a Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger <wg@grandegger.com> Date: Tue, 23 Nov 2010 06:40:25 +0000 Subject: [PATCH 148/179] au1000_eth: fix invalid address accessing the MAC enable register "aup->enable" holds already the address pointing to the MAC enable register. The bug was introduced by commit d0e7cb: "au1000-eth: remove volatiles, switch to I/O accessors". CC: Florian Fainelli <florian@openwrt.org> Signed-off-by: Wolfgang Grandegger <wg@denx.de> Acked-by: Florian Fainelli <florian@openwrt.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/au1000_eth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 43489f89c142..53eff9ba6e95 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -155,10 +155,10 @@ static void au1000_enable_mac(struct net_device *dev, int force_reset) spin_lock_irqsave(&aup->lock, flags); if (force_reset || (!aup->mac_enabled)) { - writel(MAC_EN_CLOCK_ENABLE, &aup->enable); + writel(MAC_EN_CLOCK_ENABLE, aup->enable); au_sync_delay(2); writel((MAC_EN_RESET0 | MAC_EN_RESET1 | MAC_EN_RESET2 - | MAC_EN_CLOCK_ENABLE), &aup->enable); + | MAC_EN_CLOCK_ENABLE), aup->enable); au_sync_delay(2); aup->mac_enabled = 1; @@ -503,9 +503,9 @@ static void au1000_reset_mac_unlocked(struct net_device *dev) au1000_hard_stop(dev); - writel(MAC_EN_CLOCK_ENABLE, &aup->enable); + writel(MAC_EN_CLOCK_ENABLE, aup->enable); au_sync_delay(2); - writel(0, &aup->enable); + writel(0, aup->enable); au_sync_delay(2); aup->tx_full = 0; @@ -1119,7 +1119,7 @@ static int __devinit au1000_probe(struct platform_device *pdev) /* set a random MAC now in case platform_data doesn't provide one */ random_ether_addr(dev->dev_addr); - writel(0, &aup->enable); + writel(0, aup->enable); aup->mac_enabled = 0; pd = pdev->dev.platform_data; From 3c6f27bf33052ea6ba9d82369fb460726fb779c0 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg <drosenberg@vsecurity.com> Date: Tue, 23 Nov 2010 11:02:13 +0000 Subject: [PATCH 149/179] DECnet: don't leak uninitialized stack byte A single uninitialized padding byte is leaked to userspace. Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com> CC: stable <stable@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/decnet/af_decnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a76b78de679f..6f97268ed85f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if (r_len > sizeof(struct linkinfo_dn)) r_len = sizeof(struct linkinfo_dn); + memset(&link, 0, sizeof(link)); + switch(sock->state) { case SS_CONNECTING: link.idn_linkstate = LL_CONNECTING; From bcc70bb3aeae7c3d035881d41055685f08a2b745 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov <gorcunov@gmail.com> Date: Tue, 23 Nov 2010 11:43:44 +0000 Subject: [PATCH 150/179] net, ppp: Report correct error code if unit allocation failed Allocating unit from ird might return several error codes not only -EAGAIN, so it should not be changed and returned precisely. Same time unit release procedure should be invoked only if device is unregistering. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> CC: Paul Mackerras <paulus@samba.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ppp_generic.c | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 09cf56d0416a..39659976a1ac 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2584,16 +2584,16 @@ ppp_create_interface(struct net *net, int unit, int *retp) */ dev_net_set(dev, net); - ret = -EEXIST; mutex_lock(&pn->all_ppp_mutex); if (unit < 0) { unit = unit_get(&pn->units_idr, ppp); if (unit < 0) { - *retp = unit; + ret = unit; goto out2; } } else { + ret = -EEXIST; if (unit_find(&pn->units_idr, unit)) goto out2; /* unit already exists */ /* @@ -2668,10 +2668,10 @@ static void ppp_shutdown_interface(struct ppp *ppp) ppp->closing = 1; ppp_unlock(ppp); unregister_netdev(ppp->dev); + unit_put(&pn->units_idr, ppp->file.index); } else ppp_unlock(ppp); - unit_put(&pn->units_idr, ppp->file.index); ppp->file.dead = 1; ppp->owner = NULL; wake_up_interruptible(&ppp->file.rwait); @@ -2859,8 +2859,7 @@ static void __exit ppp_cleanup(void) * by holding all_ppp_mutex */ -/* associate pointer with specified number */ -static int unit_set(struct idr *p, void *ptr, int n) +static int __unit_alloc(struct idr *p, void *ptr, int n) { int unit, err; @@ -2871,10 +2870,24 @@ static int unit_set(struct idr *p, void *ptr, int n) } err = idr_get_new_above(p, ptr, n, &unit); - if (err == -EAGAIN) - goto again; + if (err < 0) { + if (err == -EAGAIN) + goto again; + return err; + } - if (unit != n) { + return unit; +} + +/* associate pointer with specified number */ +static int unit_set(struct idr *p, void *ptr, int n) +{ + int unit; + + unit = __unit_alloc(p, ptr, n); + if (unit < 0) + return unit; + else if (unit != n) { idr_remove(p, unit); return -EINVAL; } @@ -2885,19 +2898,7 @@ static int unit_set(struct idr *p, void *ptr, int n) /* get new free unit number and associate pointer with it */ static int unit_get(struct idr *p, void *ptr) { - int unit, err; - -again: - if (!idr_pre_get(p, GFP_KERNEL)) { - printk(KERN_ERR "PPP: No free memory for idr\n"); - return -ENOMEM; - } - - err = idr_get_new_above(p, ptr, 0, &unit); - if (err == -EAGAIN) - goto again; - - return unit; + return __unit_alloc(p, ptr, 0); } /* put unit number back to a pool */ From 42eb59d3a80ff83b4cacb92dcc44b22da7d4969b Mon Sep 17 00:00:00 2001 From: Casey Leedom <leedom@chelsio.com> Date: Wed, 24 Nov 2010 12:23:57 +0000 Subject: [PATCH 151/179] cxgb4vf: fix setting unicast/multicast addresses ... We were truncating the number of unicast and multicast MAC addresses supported. Additionally, we were incorrectly computing the MAC Address hash (a "1 << N" where we needed a "1ULL << N"). Signed-off-by: Casey Leedom <leedom@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/cxgb4vf/cxgb4vf_main.c | 73 ++++++++++++++--------- drivers/net/cxgb4vf/t4vf_hw.c | 94 +++++++++++++++++++----------- 2 files changed, 104 insertions(+), 63 deletions(-) diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c index c3449bbc585a..d887a76cd39d 100644 --- a/drivers/net/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/cxgb4vf/cxgb4vf_main.c @@ -816,40 +816,48 @@ static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) } /* - * Collect up to maxaddrs worth of a netdevice's unicast addresses into an - * array of addrss pointers and return the number collected. + * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting + * at a specified offset within the list, into an array of addrss pointers and + * return the number collected. */ -static inline int collect_netdev_uc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int maxaddrs) +static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev, + const u8 **addr, + unsigned int offset, + unsigned int maxaddrs) { + unsigned int index = 0; unsigned int naddr = 0; const struct netdev_hw_addr *ha; - for_each_dev_addr(dev, ha) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } + for_each_dev_addr(dev, ha) + if (index++ >= offset) { + addr[naddr++] = ha->addr; + if (naddr >= maxaddrs) + break; + } return naddr; } /* - * Collect up to maxaddrs worth of a netdevice's multicast addresses into an - * array of addrss pointers and return the number collected. + * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting + * at a specified offset within the list, into an array of addrss pointers and + * return the number collected. */ -static inline int collect_netdev_mc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int maxaddrs) +static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev, + const u8 **addr, + unsigned int offset, + unsigned int maxaddrs) { + unsigned int index = 0; unsigned int naddr = 0; const struct netdev_hw_addr *ha; - netdev_for_each_mc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } + netdev_for_each_mc_addr(ha, dev) + if (index++ >= offset) { + addr[naddr++] = ha->addr; + if (naddr >= maxaddrs) + break; + } return naddr; } @@ -862,16 +870,20 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) u64 mhash = 0; u64 uhash = 0; bool free = true; - u16 filt_idx[7]; + unsigned int offset, naddr; const u8 *addr[7]; - int ret, naddr = 0; + int ret; const struct port_info *pi = netdev_priv(dev); /* first do the secondary unicast addresses */ - naddr = collect_netdev_uc_list_addrs(dev, addr, ARRAY_SIZE(addr)); - if (naddr > 0) { + for (offset = 0; ; offset += naddr) { + naddr = collect_netdev_uc_list_addrs(dev, addr, offset, + ARRAY_SIZE(addr)); + if (naddr == 0) + break; + ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, filt_idx, &uhash, sleep); + naddr, addr, NULL, &uhash, sleep); if (ret < 0) return ret; @@ -879,12 +891,17 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) } /* next set up the multicast addresses */ - naddr = collect_netdev_mc_list_addrs(dev, addr, ARRAY_SIZE(addr)); - if (naddr > 0) { + for (offset = 0; ; offset += naddr) { + naddr = collect_netdev_mc_list_addrs(dev, addr, offset, + ARRAY_SIZE(addr)); + if (naddr == 0) + break; + ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, filt_idx, &mhash, sleep); + naddr, addr, NULL, &mhash, sleep); if (ret < 0) return ret; + free = false; } return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c index e306c20dfaee..19520afe1a12 100644 --- a/drivers/net/cxgb4vf/t4vf_hw.c +++ b/drivers/net/cxgb4vf/t4vf_hw.c @@ -1014,48 +1014,72 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok) { - int i, ret; + int offset, ret = 0; + unsigned nfilters = 0; + unsigned int rem = naddr; struct fw_vi_mac_cmd cmd, rpl; - struct fw_vi_mac_exact *p; - size_t len16; - if (naddr > ARRAY_SIZE(cmd.u.exact)) + if (naddr > FW_CLS_TCAM_NUM_ENTRIES) return -EINVAL; - len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, - u.exact[naddr]), 16); - memset(&cmd, 0, sizeof(cmd)); - cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) | - FW_CMD_REQUEST | - FW_CMD_WRITE | - (free ? FW_CMD_EXEC : 0) | - FW_VI_MAC_CMD_VIID(viid)); - cmd.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) | - FW_CMD_LEN16(len16)); + for (offset = 0; offset < naddr; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact) + ? rem + : ARRAY_SIZE(cmd.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; - for (i = 0, p = cmd.u.exact; i < naddr; i++, p++) { - p->valid_to_idx = - cpu_to_be16(FW_VI_MAC_CMD_VALID | - FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC)); - memcpy(p->macaddr, addr[i], sizeof(p->macaddr)); + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) | + FW_CMD_REQUEST | + FW_CMD_WRITE | + (free ? FW_CMD_EXEC : 0) | + FW_VI_MAC_CMD_VIID(viid)); + cmd.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) | + FW_CMD_LEN16(len16)); + + for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID | + FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + + ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, + sleep_ok); + if (ret && ret != -ENOMEM) + break; + + for (i = 0, p = rpl.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_GET( + be16_to_cpu(p->valid_to_idx)); + + if (idx) + idx[offset+i] = + (index >= FW_CLS_TCAM_NUM_ENTRIES + ? 0xffff + : index); + if (index < FW_CLS_TCAM_NUM_ENTRIES) + nfilters++; + else if (hash) + *hash |= (1ULL << hash_mac_addr(addr[offset+i])); + } + + free = false; + offset += fw_naddr; + rem -= fw_naddr; } - ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, sleep_ok); - if (ret) - return ret; - - for (i = 0, p = rpl.u.exact; i < naddr; i++, p++) { - u16 index = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx)); - - if (idx) - idx[i] = (index >= FW_CLS_TCAM_NUM_ENTRIES - ? 0xffff - : index); - if (index < FW_CLS_TCAM_NUM_ENTRIES) - ret++; - else if (hash) - *hash |= (1 << hash_mac_addr(addr[i])); - } + /* + * If there were no errors or we merely ran out of room in our MAC + * address arena, return the number of filters actually written. + */ + if (ret == 0 || ret == -ENOMEM) + ret = nfilters; return ret; } From 03fe5f3ef7eab88e1405baa52a7923fbf337230b Mon Sep 17 00:00:00 2001 From: Jiri Slaby <jslaby@suse.cz> Date: Wed, 24 Nov 2010 13:54:54 +0000 Subject: [PATCH 152/179] NET: wan/x25_asy, move lapb_unregister to x25_asy_close_tty We register lapb when tty is created, but unregister it only when the device is UP. So move the lapb_unregister to x25_asy_close_tty after the device is down. The old behaviour causes ldisc switching to fail each second attempt, because we noted for us that the device is unused, so we use it the second time, but labp layer still have it registered, so it fails obviously. Signed-off-by: Jiri Slaby <jslaby@suse.cz> Reported-by: Sergey Lapin <slapin@ossfans.org> Cc: Andrew Hendry <andrew.hendry@gmail.com> Tested-by: Sergey Lapin <slapin@ossfans.org> Tested-by: Mikhail Ulyanov <ulyanov.mikhail@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/wan/x25_asy.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index d81ad8397885..cf05504d9511 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -498,7 +498,6 @@ static int x25_asy_open(struct net_device *dev) static int x25_asy_close(struct net_device *dev) { struct x25_asy *sl = netdev_priv(dev); - int err; spin_lock(&sl->lock); if (sl->tty) @@ -507,10 +506,6 @@ static int x25_asy_close(struct net_device *dev) netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; - err = lapb_unregister(dev); - if (err != LAPB_OK) - printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n", - err); spin_unlock(&sl->lock); return 0; } @@ -595,6 +590,7 @@ static int x25_asy_open_tty(struct tty_struct *tty) static void x25_asy_close_tty(struct tty_struct *tty) { struct x25_asy *sl = tty->disc_data; + int err; /* First make sure we're connected. */ if (!sl || sl->magic != X25_ASY_MAGIC) @@ -605,6 +601,11 @@ static void x25_asy_close_tty(struct tty_struct *tty) dev_close(sl->dev); rtnl_unlock(); + err = lapb_unregister(sl->dev); + if (err != LAPB_OK) + printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n", + err); + tty->disc_data = NULL; sl->tty = NULL; x25_asy_free(sl); From 8e65c0ece6f2aa732f9b755331869c67aeb1c7f6 Mon Sep 17 00:00:00 2001 From: Filip Aben <f.aben@option.com> Date: Thu, 25 Nov 2010 03:40:50 +0000 Subject: [PATCH 153/179] hso: fix disable_net The HSO driver incorrectly creates a serial device instead of a net device when disable_net is set. It shouldn't create anything for the network interface. Signed-off-by: Filip Aben <f.aben@option.com> Reported-by: Piotr Isajew <pki@ex.com.pl> Reported-by: Johan Hovold <jhovold@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/usb/hso.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index b154a94de03e..62e9e8dc8190 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2994,12 +2994,14 @@ static int hso_probe(struct usb_interface *interface, case HSO_INTF_BULK: /* It's a regular bulk interface */ - if (((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) && - !disable_net) - hso_dev = hso_create_net_device(interface, port_spec); - else + if ((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) { + if (!disable_net) + hso_dev = + hso_create_net_device(interface, port_spec); + } else { hso_dev = hso_create_bulk_serial_device(interface, port_spec); + } if (!hso_dev) goto exit; break; From 71993e62a47dabddf10302807d6aa260455503f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 28 Nov 2010 13:56:09 -0800 Subject: [PATCH 154/179] Rename 'pipe_info()' to 'get_pipe_info()' .. and change it to take the 'file' pointer instead of an inode, since that's what all users want anyway. The renaming is preparatory to exporting it to other users. The old 'pipe_info()' name was too generic and is already used elsewhere, so before making the function public we need to use a more specific name. Cc: Jens Axboe <jaxboe@fusionio.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Dave Jones <davej@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/splice.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 8f1dfaecc8f0..0d92dabcc576 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1316,12 +1316,11 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, * location, so checking ->i_pipe is not enough to verify that this is a * pipe. */ -static inline struct pipe_inode_info *pipe_info(struct inode *inode) +static inline struct pipe_inode_info *get_pipe_info(struct file *file) { - if (S_ISFIFO(inode->i_mode)) - return inode->i_pipe; + struct inode *i = file->f_path.dentry->d_inode; - return NULL; + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; } /* @@ -1336,8 +1335,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, loff_t offset, *off; long ret; - ipipe = pipe_info(in->f_path.dentry->d_inode); - opipe = pipe_info(out->f_path.dentry->d_inode); + ipipe = get_pipe_info(in); + opipe = get_pipe_info(out); if (ipipe && opipe) { if (off_in || off_out) @@ -1555,7 +1554,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, int error; long ret; - pipe = pipe_info(file->f_path.dentry->d_inode); + pipe = get_pipe_info(file); if (!pipe) return -EBADF; @@ -1642,7 +1641,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, }; long ret; - pipe = pipe_info(file->f_path.dentry->d_inode); + pipe = get_pipe_info(file); if (!pipe) return -EBADF; @@ -2022,8 +2021,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); - struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); + struct pipe_inode_info *ipipe = get_pipe_info(in); + struct pipe_inode_info *opipe = get_pipe_info(out); int ret = -EINVAL; /* From c66fb347946ebdd5b10908866ecc9fa05ee2cf3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 28 Nov 2010 14:09:57 -0800 Subject: [PATCH 155/179] Export 'get_pipe_info()' to other users And in particular, use it in 'pipe_fcntl()'. The other pipe functions do not need to use the 'careful' version, since they are only ever called for things that are already known to be pipes. The normal read/write/ioctl functions are called through the file operations structures, so if a file isn't a pipe, they'd never get called. But pipe_fcntl() is special, and called directly from the generic fcntl code, and needs to use the same careful function that the splice code is using. Cc: Jens Axboe <jaxboe@fusionio.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Dave Jones <davej@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/pipe.c | 2 +- fs/splice.c | 11 ----------- include/linux/pipe_fs_i.h | 12 ++++++++++++ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index a8012a955720..b8997f8c6324 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1204,7 +1204,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) struct pipe_inode_info *pipe; long ret; - pipe = file->f_path.dentry->d_inode->i_pipe; + pipe = get_pipe_info(file); if (!pipe) return -EBADF; diff --git a/fs/splice.c b/fs/splice.c index 0d92dabcc576..ce2f02579e35 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1311,17 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} /* * Determine where to splice to/from. diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 445796945ac9..3c5ac3147428 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -161,4 +161,16 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +static inline struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + #endif From 72083646528d4887b920deb71b37e09bc7d227bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 28 Nov 2010 16:27:19 -0800 Subject: [PATCH 156/179] Un-inline get_pipe_info() helper function This avoids some include-file hell, and the function isn't really important enough to be inlined anyway. Reported-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/pipe.c | 12 ++++++++++++ include/linux/pipe_fs_i.h | 13 +------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index b8997f8c6324..04629f36e397 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1199,6 +1199,18 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, return ret; } +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3c5ac3147428..bb27d7ec2fb9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -160,17 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); - -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} +struct pipe_inode_info *get_pipe_info(struct file *file); #endif From 163cf09c2a0ee5cac6285f9347975bd1e97725da Mon Sep 17 00:00:00 2001 From: Chris Mason <chris.mason@oracle.com> Date: Sun, 28 Nov 2010 19:56:33 -0500 Subject: [PATCH 157/179] Btrfs: deal with DIO bios that span more than one ordered extent The new DIO bio splitting code has problems when the bio spans more than one ordered extent. This will happen as the generic DIO code merges our get_blocks calls together into a bigger single bio. This fixes things by walking forward in the ordered extent code finding all the overlapping ordered extents and completing them all at once. Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/inode.c | 23 +++++++++++--- fs/btrfs/ordered-data.c | 67 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 3 ++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6df921f218fb..0f34cae0a633 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5602,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered = NULL; struct extent_state *cached_state = NULL; + u64 ordered_offset = dip->logical_offset; + u64 ordered_bytes = dip->bytes; int ret; if (err) goto out_done; - - ret = btrfs_dec_test_ordered_pending(inode, &ordered, - dip->logical_offset, dip->bytes); +again: + ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, + &ordered_offset, + ordered_bytes); if (!ret) - goto out_done; + goto out_test; BUG_ON(!ordered); @@ -5670,8 +5673,20 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) out: btrfs_delalloc_release_metadata(inode, ordered->len); btrfs_end_transaction(trans, root); + ordered_offset = ordered->file_offset + ordered->len; btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); + +out_test: + /* + * our bio might span multiple ordered extents. If we haven't + * completed the accounting for the whole dio, go back and try again + */ + if (ordered_offset < dip->logical_offset + dip->bytes) { + ordered_bytes = dip->logical_offset + dip->bytes - + ordered_offset; + goto again; + } out_done: bio->bi_private = dip->private; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f4621f6deca1..ae7737e352c9 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -248,6 +248,73 @@ int btrfs_add_ordered_sum(struct inode *inode, return 0; } +/* + * this is used to account for finished IO across a given range + * of the file. The IO may span ordered extents. If + * a given ordered_extent is completely done, 1 is returned, otherwise + * 0. + * + * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used + * to make sure this function only returns 1 once for a given ordered extent. + * + * file_offset is updated to one byte past the range that is recorded as + * complete. This allows you to walk forward in the file. + */ +int btrfs_dec_test_first_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent **cached, + u64 *file_offset, u64 io_size) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + int ret; + u64 dec_end; + u64 dec_start; + u64 to_dec; + + tree = &BTRFS_I(inode)->ordered_tree; + spin_lock(&tree->lock); + node = tree_search(tree, *file_offset); + if (!node) { + ret = 1; + goto out; + } + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, *file_offset)) { + ret = 1; + goto out; + } + + dec_start = max(*file_offset, entry->file_offset); + dec_end = min(*file_offset + io_size, entry->file_offset + + entry->len); + *file_offset = dec_end; + if (dec_start > dec_end) { + printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", + (unsigned long long)dec_start, + (unsigned long long)dec_end); + } + to_dec = dec_end - dec_start; + if (to_dec > entry->bytes_left) { + printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", + (unsigned long long)entry->bytes_left, + (unsigned long long)to_dec); + } + entry->bytes_left -= to_dec; + if (entry->bytes_left == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + else + ret = 1; +out: + if (!ret && cached && entry) { + *cached = entry; + atomic_inc(&entry->refs); + } + spin_unlock(&tree->lock); + return ret == 0; +} + /* * this is used to account for finished IO across a given range * of the file. The IO should not span ordered extents. If diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ac365492a3f..61dca83119dd 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, u64 file_offset, u64 io_size); +int btrfs_dec_test_first_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent **cached, + u64 *file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, From 5c7e57f7cddb83d81d83fefa5822dfe80891130e Mon Sep 17 00:00:00 2001 From: Breno Leitao <breno@cafe.(none)> Date: Fri, 26 Nov 2010 07:26:27 +0000 Subject: [PATCH 158/179] ehea: Add some info messages and fix an issue This patch adds some debug information about ehea not being able to allocate enough spaces. Also it correctly updates the amount of available skb. Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ehea/ehea_main.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 182b2a7be8dc..3d0af08483a1 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -400,6 +400,7 @@ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes) skb_arr_rq1[index] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); if (!skb_arr_rq1[index]) { + ehea_info("Unable to allocate enough skb in the array\n"); pr->rq1_skba.os_skbs = fill_wqes - i; break; } @@ -422,13 +423,20 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a) struct net_device *dev = pr->port->netdev; int i; - for (i = 0; i < pr->rq1_skba.len; i++) { + if (nr_rq1a > pr->rq1_skba.len) { + ehea_error("NR_RQ1A bigger than skb array len\n"); + return; + } + + for (i = 0; i < nr_rq1a; i++) { skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb_arr_rq1[i]) + if (!skb_arr_rq1[i]) { + ehea_info("No enough memory to allocate skb array\n"); break; + } } /* Ring doorbell */ - ehea_update_rq1a(pr->qp, nr_rq1a); + ehea_update_rq1a(pr->qp, i); } static int ehea_refill_rq_def(struct ehea_port_res *pr, @@ -735,8 +743,10 @@ static int ehea_proc_rwqes(struct net_device *dev, skb = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb) + if (!skb) { + ehea_info("Not enough memory to allocate skb\n"); break; + } } skb_copy_to_linear_data(skb, ((char *)cqe) + 64, cqe->num_bytes_transfered - 4); From b4ff3c90e6066bacc8a92111752fe9e4f4c45cca Mon Sep 17 00:00:00 2001 From: Nagendra Tomar <tomer_iisc@yahoo.com> Date: Fri, 26 Nov 2010 14:26:27 +0000 Subject: [PATCH 159/179] inet: Fix __inet_inherit_port() to correctly increment bsockets and num_owners inet sockets corresponding to passive connections are added to the bind hash using ___inet_inherit_port(). These sockets are later removed from the bind hash using __inet_put_port(). These two functions are not exactly symmetrical. __inet_put_port() decrements hashinfo->bsockets and tb->num_owners, whereas ___inet_inherit_port() does not increment them. This results in both of these going to -ve values. This patch fixes this by calling inet_bind_hash() from ___inet_inherit_port(), which does the right thing. 'bsockets' and 'num_owners' were introduced by commit a9d8f9110d7e953c (inet: Allowing more than 64k connections and heavily optimize bind(0)) Signed-off-by: Nagendra Singh Tomar <tomer_iisc@yahoo.com> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Evgeniy Polyakov <zbr@ioremap.net> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/inet_hashtables.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b463..3c0369a3a663 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) } } } - sk_add_bind_node(child, &tb->owners); - inet_csk(child)->icsk_bind_hash = tb; + inet_bind_hash(child, tb, port); spin_unlock(&head->lock); return 0; From d830418e4085d65b3f8bad3216a37bc986ecd17d Mon Sep 17 00:00:00 2001 From: Yang Li <leoli@freescale.com> Date: Thu, 25 Nov 2010 23:29:58 +0000 Subject: [PATCH 160/179] ucc_geth: fix ucc halt problem in half duplex mode In commit 58933c64(ucc_geth: Fix the wrong the Rx/Tx FIFO size), the UCC_GETH_UTFTT_INIT is set to 512 based on the recommendation of the QE Reference Manual. But that will sometimes cause tx halt while working in half duplex mode. According to errata draft QE_GENERAL-A003(High Tx Virtual FIFO threshold size can cause UCC to halt), setting UTFTT less than [(UTFS x (M - 8)/M) - 128] will prevent this from happening (M is the minimum buffer size). The patch changes UTFTT back to 256. Signed-off-by: Li Yang <leoli@freescale.com> Cc: Jean-Denis Boyer <jdboyer@media5corp.com> Cc: Andreas Schmitz <Andreas.Schmitz@riedel.net> Cc: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ucc_geth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 05a95586f3c5..055b87ab4f07 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -899,7 +899,8 @@ struct ucc_geth_hardware_statistics { #define UCC_GETH_UTFS_INIT 512 /* Tx virtual FIFO size */ #define UCC_GETH_UTFET_INIT 256 /* 1/2 utfs */ -#define UCC_GETH_UTFTT_INIT 512 +#define UCC_GETH_UTFTT_INIT 256 /* 1/2 utfs + due to errata */ /* Gigabit Ethernet (1000 Mbps) */ #define UCC_GETH_URFS_GIGA_INIT 4096/*2048*/ /* Rx virtual FIFO size */ From 5a92bc88cef279261d3f138e25850c122df67045 Mon Sep 17 00:00:00 2001 From: Chris Mason <chris.mason@oracle.com> Date: Mon, 29 Nov 2010 09:49:11 -0500 Subject: [PATCH 161/179] Btrfs: don't use migrate page without CONFIG_MIGRATION Fixes compile error Signed-off-by: Chris Mason <chris.mason@oracle.com> --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 57c9d8eeb7dc..33b6d459494c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -712,8 +712,11 @@ static int btree_migratepage(struct address_space *mapping, if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - +#ifdef CONFIG_MIGRATION return migrate_page(mapping, newpage, page); +#else + return -ENOSYS; +#endif } static int btree_writepage(struct page *page, struct writeback_control *wbc) @@ -821,7 +824,9 @@ static const struct address_space_operations btree_aops = { .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, +#ifdef CONFIG_MIGRATION .migratepage = btree_migratepage, +#endif }; int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, From a1dcfcb7f2d08717325157ed3c1db2362d6eb8c9 Mon Sep 17 00:00:00 2001 From: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com> Date: Sun, 21 Nov 2010 19:58:37 +0000 Subject: [PATCH 162/179] pch_gbe dreiver: chang author This driver's AUTHOR was changed to "Toshiharu Okada" from "Masayuki Ohtake". I update the Kconfig, renamed "Topcliff" to "EG20T". Signed-off-by: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/Kconfig | 6 +++--- drivers/net/pch_gbe/pch_gbe_main.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f6668cdaac85..a445a0492c52 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2543,10 +2543,10 @@ config PCH_GBE depends on PCI select MII ---help--- - This is a gigabit ethernet driver for Topcliff PCH. - Topcliff PCH is the platform controller hub that is used in Intel's + This is a gigabit ethernet driver for EG20T PCH. + EG20T PCH is the platform controller hub that is used in Intel's general embedded platform. - Topcliff PCH has Gigabit Ethernet interface. + EG20T PCH has Gigabit Ethernet interface. Using this interface, it is able to access system devices connected to Gigabit Ethernet. This driver enables Gigabit Ethernet function. diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 472056b47440..03a1d280105f 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -1,6 +1,6 @@ /* * Copyright (C) 1999 - 2010 Intel Corporation. - * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD. + * Copyright (C) 2010 OKI SEMICONDUCTOR CO., LTD. * * This code was derived from the Intel e1000e Linux driver. * @@ -2464,8 +2464,8 @@ static void __exit pch_gbe_exit_module(void) module_init(pch_gbe_init_module); module_exit(pch_gbe_exit_module); -MODULE_DESCRIPTION("OKI semiconductor PCH Gigabit ethernet Driver"); -MODULE_AUTHOR("OKI semiconductor, <masa-korg@dsn.okisemi.com>"); +MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver"); +MODULE_AUTHOR("OKI SEMICONDUCTOR, <toshiharu-linux@dsn.okisemi.com>"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id); From 50a4205333c5e545551f1f82b3004ca635407c5c Mon Sep 17 00:00:00 2001 From: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com> Date: Mon, 29 Nov 2010 06:18:07 +0000 Subject: [PATCH 163/179] pch_gbe driver: The wrong of initializer entry The wrong of initializer entry was modified. Signed-off-by: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com> Reported-by: Dr. David Alan Gilbert <linux@treblig.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/pch_gbe/pch_gbe_param.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/pch_gbe/pch_gbe_param.c b/drivers/net/pch_gbe/pch_gbe_param.c index 2510146fc560..ef0996a0eaaa 100644 --- a/drivers/net/pch_gbe/pch_gbe_param.c +++ b/drivers/net/pch_gbe/pch_gbe_param.c @@ -434,8 +434,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) .err = "using default of " __MODULE_STRING(PCH_GBE_DEFAULT_TXD), .def = PCH_GBE_DEFAULT_TXD, - .arg = { .r = { .min = PCH_GBE_MIN_TXD } }, - .arg = { .r = { .max = PCH_GBE_MAX_TXD } } + .arg = { .r = { .min = PCH_GBE_MIN_TXD, + .max = PCH_GBE_MAX_TXD } } }; struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; tx_ring->count = TxDescriptors; @@ -450,8 +450,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) .err = "using default of " __MODULE_STRING(PCH_GBE_DEFAULT_RXD), .def = PCH_GBE_DEFAULT_RXD, - .arg = { .r = { .min = PCH_GBE_MIN_RXD } }, - .arg = { .r = { .max = PCH_GBE_MAX_RXD } } + .arg = { .r = { .min = PCH_GBE_MIN_RXD, + .max = PCH_GBE_MAX_RXD } } }; struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; rx_ring->count = RxDescriptors; From 25888e30319f8896fc656fc68643e6a078263060 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <eric.dumazet@gmail.com> Date: Thu, 25 Nov 2010 04:11:39 +0000 Subject: [PATCH 164/179] af_unix: limit recursion level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its easy to eat all kernel memory and trigger NMI watchdog, using an exploit program that queues unix sockets on top of others. lkml ref : http://lkml.org/lkml/2010/11/25/8 This mechanism is used in applications, one choice we have is to have a recursion limit. Other limits might be needed as well (if we queue other types of files), since the passfd mechanism is currently limited by socket receive queue sizes only. Add a recursion_level to unix socket, allowing up to 4 levels. Each time we send an unix socket through sendfd mechanism, we copy its recursion level (plus one) to receiver. This recursion level is cleared when socket receive queue is emptied. Reported-by: Марк Коренберг <socketpair@gmail.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/af_unix.h | 2 ++ net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++----- net/unix/garbage.c | 2 +- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 90c9e2872f27..18e5c3f67580 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); +extern struct sock *unix_get_socket(struct file *filp); #define UNIX_HASH_SIZE 256 @@ -56,6 +57,7 @@ struct unix_sock { spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; + unsigned char recursion_level; struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3c95304a0817..2268e6798124 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +#define MAX_RECURSION_LEVEL 4 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + unsigned char max_level = 0; + int unix_sock_count = 0; + + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + + if (sk) { + unix_sock_count++; + max_level = max(max_level, + unix_sk(sk)->recursion_level); + } + } + if (unlikely(max_level > MAX_RECURSION_LEVEL)) + return -ETOOMANYREFS; /* * Need to duplicate file references for the sake of garbage @@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count-1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - return 0; + if (unix_sock_count) { + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); + } + return max_level; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; err = unix_scm_to_skb(siocb->scm, skb, true); - if (err) + if (err < 0) goto out_free; + max_level = err + 1; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1514,6 +1534,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); @@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, int sent = 0; struct scm_cookie tmp_scm; bool fds_sent = false; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); - if (err) { + if (err < 0) { kfree_skb(skb); goto out_err; } + max_level = err + 1; fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); @@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, size); sent += size; @@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { + unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 40df93d1cf35..f89f83bf828e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; From 3f0d3d016d89a5efb8b926d4707eb21fa13f3d27 Mon Sep 17 00:00:00 2001 From: Matthew Garrett <mjg@redhat.com> Date: Thu, 21 Oct 2010 17:42:40 -0400 Subject: [PATCH 165/179] tpm: Autodetect itpm devices Some Lenovos have TPMs that require a quirk to function correctly. This can be autodetected by checking whether the device has a _HID of INTC0102. This is an invalid PNPid, and as such is discarded by the pnp layer - however it's still present in the ACPI code, so we can pull it out that way. This means that the quirk won't be automatically applied on non-ACPI systems, but without ACPI we don't have any way to identify the chip anyway so I don't think that's a great concern. Signed-off-by: Matthew Garrett <mjg@redhat.com> Acked-by: Rajiv Andrade <srajiv@linux.vnet.ibm.com> Tested-by: Jiri Kosina <jkosina@suse.cz> Tested-by: Andy Isaacson <adi@hexapodia.org> Signed-off-by: James Morris <jmorris@namei.org> --- drivers/char/tpm/tpm_tis.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 1030f8420137..c17a305ecb28 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/wait.h> +#include <linux/acpi.h> #include "tpm.h" #define TPM_HEADER_SIZE 10 @@ -78,6 +79,26 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); +#ifdef CONFIG_ACPI +static int is_itpm(struct pnp_dev *dev) +{ + struct acpi_device *acpi = pnp_acpi_device(dev); + struct acpi_hardware_id *id; + + list_for_each_entry(id, &acpi->pnp.ids, list) { + if (!strcmp("INTC0102", id->id)) + return 1; + } + + return 0; +} +#else +static int is_itpm(struct pnp_dev *dev) +{ + return 0; +} +#endif + static int check_locality(struct tpm_chip *chip, int l) { if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) & @@ -472,6 +493,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (is_itpm(to_pnp_dev(dev))) + itpm = 1; + if (itpm) dev_info(dev, "Intel iTPM workaround enabled\n"); From bcb38ceb225f5d5b2198a2755277cd441ed1e82b Mon Sep 17 00:00:00 2001 From: Dave Airlie <airlied@redhat.com> Date: Tue, 30 Nov 2010 09:15:46 +1000 Subject: [PATCH 166/179] Revert "debug_locks: set oops_in_progress if we will log messages." This reverts commit e0fdace10e75dac67d906213b780ff1b1a4cc360. On-list discussion seems to suggest that the robustness fixes for printk make this unnecessary and DaveM has also agreed in person at Kernel Summit and on list. The main problem with this code is once we hit a lockdep splat we always keep oops_in_progress set, the console layer uses oops_in_progress with KMS to decide when it should be showing the oops and not showing X, so it causes problems around suspend/resume time when a userspace resume can cause a console switch away from X, only if oops_in_progress is set (which is what we want if an oops actually is in progress, but not because we had a lockdep splat 2 days prior). Cc: David S Miller <davem@davemloft.net> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Dave Airlie <airlied@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- lib/debug_locks.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 5bf0020b9248..b1c177307677 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -8,7 +8,6 @@ * * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> */ -#include <linux/kernel.h> #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/module.h> @@ -39,7 +38,6 @@ int debug_locks_off(void) { if (__debug_locks_off()) { if (!debug_locks_silent) { - oops_in_progress = 1; console_verbose(); return 1; } From f2e785ed5fb8e5fe5063ee2ba1c8f150396c53c6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <a.p.zijlstra@chello.nl> Date: Fri, 26 Nov 2010 15:38:45 +0100 Subject: [PATCH 167/179] powerpc: Use call_rcu_sched() for pagetables PowerPC relies on IRQ-disable to guard against RCU quiecent states, use the appropriate RCU call version. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- arch/powerpc/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 2c7e801ab20b..6a3997f98dfb 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct rcu_head *head) static void pte_free_submit(struct pte_freelist_batch *batch) { - call_rcu(&batch->rcu, pte_free_rcu_callback); + call_rcu_sched(&batch->rcu, pte_free_rcu_callback); } void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) From e8a7e48bb248a1196484d3f8afa53bded2b24e71 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Mon, 29 Nov 2010 20:42:04 -0800 Subject: [PATCH 168/179] Linux 2.6.37-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b31d21377e4c..9e3c89030f5c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 37 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* From ac88e07122fc0eb5cbad403be97ef02c317a06b7 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 24 Nov 2010 16:51:17 +0000 Subject: [PATCH 169/179] ARM: hw_breakpoint: ensure OS lock is clear before writing to debug registers ARMv7 architects a system for saving and restoring the debug registers across low-power modes. At the heart of this system is a lock register which, when set, forbids writes to the debug registers. While locked, writes to debug registers via the co-processor interface will result in undefined instruction traps. Linux currently doesn't make use of this feature because we update the debug registers on context switch anyway, however the status of the lock is IMPLEMENTATION DEFINED on reset. This patch ensures that the lock is cleared during boot so that we can write to the debug registers safely. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 21e3a4ab3b8c..793959ec8982 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -768,6 +768,23 @@ static void __init reset_ctrl_regs(void *unused) { int i; + /* + * v7 debug contains save and restore registers so that debug state + * can be maintained across low-power modes without leaving + * the debug logic powered up. It is IMPLEMENTATION DEFINED whether + * we can write to the debug registers out of reset, so we must + * unlock the OS Lock Access Register to avoid taking undefined + * instruction exceptions later on. + */ + if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { + /* + * Unconditionally clear the lock by writing a value + * other than 0xC5ACCE55 to the access register. + */ + asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + isb(); + } + if (enable_monitor_mode()) return; @@ -810,17 +827,17 @@ static int __init arch_hw_breakpoint_init(void) pr_warning("halting debug mode enabled. Assuming maximum " "watchpoint size of 4 bytes."); } else { - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); - /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ smp_call_function(reset_ctrl_regs, NULL, 1); reset_ctrl_regs(NULL); + + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); } /* Register debug fault handler. */ From 7d99331e4793b52d488e911876ef11d843c6c8c9 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 24 Nov 2010 17:45:49 +0000 Subject: [PATCH 170/179] ARM: hw_breakpoint: reset control registers in hotplug path The ARMv7 debug architecture doesn't make any guarantees about the contents of debug control registers following a debug logic reset. This patch ensures that we reset the control registers when a cpu comes ONLINE (for example, with hotplug) so that when we enable monitor mode while inserting a breakpoint we won't exhibit random behaviour. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 793959ec8982..515a3c44c118 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -764,7 +764,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, /* * One-time initialisation. */ -static void __init reset_ctrl_regs(void *unused) +static void reset_ctrl_regs(void *unused) { int i; @@ -799,6 +799,18 @@ static void __init reset_ctrl_regs(void *unused) } } +static int __cpuinit dbg_reset_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + if (action == CPU_ONLINE) + smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata dbg_reset_nb = { + .notifier_call = dbg_reset_notify, +}; + static int __init arch_hw_breakpoint_init(void) { int ret = 0; @@ -846,6 +858,8 @@ static int __init arch_hw_breakpoint_init(void) hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, "breakpoint debug exception"); + /* Register hotplug notifier. */ + register_cpu_notifier(&dbg_reset_nb); out: return ret; } From 6ee33c2712fcdff2568d9bbadb25c8e5a7c36212 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Thu, 25 Nov 2010 12:01:54 +0000 Subject: [PATCH 171/179] ARM: hw_breakpoint: correct and simplify alignment fixup code The current hw_breakpoint code tries to fix up the alignment of breakpoints so that we can make use of sparse byte-address-select bits in the control register and give the illusion that we can set breakpoints on unaligned addresses. Although this works on v6 cores, v7 forbids this behaviour, instead requiring breakpoints to be set on aligned addresses and have contiguous byte-address-select ranges depending on the instruction set in use. For ARM the only supported size is 4 bytes, whilst Thumb-2 also permits 2 byte breakpoints (watchpoints can be of 1, 2, 4 or 8 bytes long). This patch simplifies the alignment fixup code so that we require addresses to be aligned to the size of the corresponding breakpoint. This allows us to handle the common case of breaking on a half-word aligned Thumb-2 instruction and also allows us to set byte watchpoints on arbitrary addresses. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 57 ++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 515a3c44c118..d37ed3501e57 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -537,6 +537,17 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } + /* + * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes. + * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported + * by the hardware and must be aligned to the appropriate number of + * bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && + info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + /* Address */ info->address = bp->attr.bp_addr; @@ -561,7 +572,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; - u32 bytelen, max_len, offset, alignment_mask = 0x3; + u32 offset, alignment_mask = 0x3; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); @@ -571,32 +582,27 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Check address alignment. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - if (info->address & alignment_mask) { - /* - * Try to fix the alignment. This may result in a length - * that is too large, so we must check for that. - */ - bytelen = get_hbp_len(info->ctrl.len); - max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : - max_watchpoint_len; - - if (max_len >= 8) - offset = info->address & 0x7; - else - offset = info->address & 0x3; - - if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { - ret = -EFBIG; - goto out; - } - - info->ctrl.len <<= offset; - info->address &= ~offset; - - pr_debug("breakpoint alignment fixup: length = 0x%x, " - "address = 0x%x\n", info->ctrl.len, info->address); + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + default: + ret = -EINVAL; + goto out; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. @@ -607,7 +613,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), "overflow handler required but none found")) { ret = -EINVAL; - goto out; } out: return ret; From 7e20269647169e7ea08a62bdc4979a3ba32e615c Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Sun, 28 Nov 2010 14:57:24 +0000 Subject: [PATCH 172/179] ARM: hw_breakpoint: disable preemption during debug exception handling On ARM, debug exceptions occur in the form of data or prefetch aborts. One difference is that debug exceptions require access to per-cpu banked registers and data structures which are not saved in the low-level exception code. For kernels built with CONFIG_PREEMPT, there is an unlikely scenario that the debug handler ends up running on a different CPU from the one that originally signalled the event, resulting in random data being read from the wrong registers. This patch adds a debug_entry macro to the low-level exception handling code which checks whether the taken exception is a debug exception. If it is, the preempt count for the faulting process is incremented. After the debug handler has finished, the count is decremented. Acked-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/entry-armv.S | 4 ++++ arch/arm/kernel/entry-header.S | 19 +++++++++++++++++++ arch/arm/kernel/hw_breakpoint.c | 18 +++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5de..34bbef0d2e7e 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -198,6 +198,7 @@ __dabt_svc: @ @ set desired IRQ state, then call main handler @ + debug_entry r1 msr cpsr_c, r9 mov r2, sp bl do_DataAbort @@ -324,6 +325,7 @@ __pabt_svc: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -439,6 +441,7 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ + debug_entry r1 enable_irq mov r2, sp adr lr, BSYM(ret_from_exception) @@ -703,6 +706,7 @@ __pabt_usr: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 enable_irq @ Enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d93f976fb389..ae9464900168 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -165,6 +165,25 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ + @ + @ Debug exceptions are taken as prefetch or data aborts. + @ We must disable preemption during the handler so that + @ we can access the debug registers safely. + @ + .macro debug_entry, fsr +#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) + ldr r4, =0x40f @ mask out fsr.fs + and r5, r4, \fsr + cmp r5, #2 @ debug exception + bne 1f + get_thread_info r10 + ldr r6, [r10, #TI_PREEMPT] @ get preempt count + add r11, r6, #1 @ increment it + str r11, [r10, #TI_PREEMPT] +1: +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index d37ed3501e57..eeba38008032 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt #include <linux/errno.h> +#include <linux/hardirq.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> #include <linux/smp.h> @@ -736,14 +737,17 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint]. + * Prefetch Abort Handler [breakpoint] with preemption disabled. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int ret = 1; /* Unhandled fault. */ + int ret = 0; u32 dscr; + /* We must be called with preemption disabled. */ + WARN_ON(preemptible()); + /* We only handle watchpoints and hardware breakpoints. */ ARM_DBG_READ(c1, 0, dscr); @@ -758,11 +762,15 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, watchpoint_handler(addr, regs); break; default: - goto out; + ret = 1; /* Unhandled fault. */ } - ret = 0; -out: + /* + * Re-enable preemption after it was disabled in the + * low-level exception handling code. + */ + preempt_enable(); + return ret; } From 0017ff42ac37ff6aeb87d0b006c5d32b9a39f5fc Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Sun, 28 Nov 2010 15:09:36 +0000 Subject: [PATCH 173/179] ARM: hw_breakpoint: don't advertise reserved breakpoints To permit handling of watchpoint exceptions without signalling a debugger, it is necessary to reserve breakpoint registers for in-kernel use only. This patch ensures that we record and subtract the number of reserved breakpoints from the number of usable breakpoint registers that we advertise to userspace via the ptrace API. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 206 ++++++++++++++++++-------------- 1 file changed, 117 insertions(+), 89 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eeba38008032..b16c4568cb01 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -45,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ static int core_num_brps; +static int core_num_reserved_brps; static int core_num_wrps; /* Debug architecture version. */ @@ -53,87 +54,6 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -/* Determine number of BRP registers available. */ -static int get_num_brps(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 24) & 0xf) + 1; -} - -/* Determine number of WRP registers available. */ -static int get_num_wrps(void) -{ - /* - * FIXME: When a watchpoint fires, the only way to work out which - * watchpoint it was is by disassembling the faulting instruction - * and working out the address of the memory access. - * - * Furthermore, we can only do this if the watchpoint was precise - * since imprecise watchpoints prevent us from calculating register - * based addresses. - * - * For the time being, we only report 1 watchpoint register so we - * always know which watchpoint fired. In the future we can either - * add a disassembler and address generation emulator, or we can - * insert a check to see if the DFAR is set on watchpoint exception - * entry [the ARM ARM states that the DFAR is UNKNOWN, but - * experience shows that it is set on some implementations]. - */ - -#if 0 - u32 didr, wrps; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 28) & 0xf) + 1; -#endif - - return 1; -} - -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - -/* Determine debug architecture. */ -static u8 get_debug_arch(void) -{ - u32 didr; - - /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); - return ARM_DEBUG_ARCH_V6; - } - - ARM_DBG_READ(c0, 0, didr); - return (didr >> 16) & 0xf; -} - -/* Does this core support mismatch breakpoints? */ -static int core_has_mismatch_bps(void) -{ - return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; -} - -u8 arch_get_debug_arch(void) -{ - return debug_arch; -} - #define READ_WB_REG_CASE(OP2, M, VAL) \ case ((OP2 << 4) + M): \ ARM_DBG_READ(c ## M, OP2, VAL); \ @@ -211,6 +131,111 @@ static void write_wb_reg(int n, u32 val) isb(); } +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warning("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, 0, didr); + return (didr >> 16) & 0xf; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + +/* Determine number of BRP register available. */ +static int get_num_brp_resources(void) +{ + u32 didr; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_brps(void) +{ + return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 && + get_num_brp_resources() > 1); +} + +/* Determine number of usable WRPs available. */ +static int get_num_wrps(void) +{ + /* + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * Providing we have more than 1 breakpoint register, we only report + * a single watchpoint register for the time being. This way, we always + * know which watchpoint fired. In the future we can either add a + * disassembler and address generation emulator, or we can insert a + * check to see if the DFAR is set on watchpoint exception entry + * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows + * that it is set on some implementations]. + */ + +#if 0 + int wrps; + u32 didr; + ARM_DBG_READ(c0, 0, didr); + wrps = ((didr >> 28) & 0xf) + 1; +#endif + int wrps = 1; + + if (core_has_mismatch_brps() && wrps >= get_num_brp_resources()) + wrps = get_num_brp_resources() - 1; + + return wrps; +} + +/* We reserve one breakpoint for each watchpoint. */ +static int get_num_reserved_brps(void) +{ + if (core_has_mismatch_brps()) + return get_num_wrps(); + return 0; +} + +/* Determine number of usable BRPs available. */ +static int get_num_brps(void) +{ + int brps = get_num_brp_resources(); + if (core_has_mismatch_brps()) + brps -= get_num_reserved_brps(); + return brps; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -326,7 +351,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { info->ctrl.mismatch = 1; @@ -377,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { i = max_slots; @@ -611,7 +636,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * we can use the mismatch feature as a poor-man's hardware single-step. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), "overflow handler required but none found")) { ret = -EINVAL; } @@ -698,7 +723,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps; ++i) { + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -801,7 +826,8 @@ static void reset_ctrl_regs(void *unused) if (enable_monitor_mode()) return; - for (i = 0; i < core_num_brps; ++i) { + /* We must also reset any reserved registers. */ + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -839,13 +865,15 @@ static int __init arch_hw_breakpoint_init(void) /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); + core_num_reserved_brps = get_num_reserved_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", - core_num_brps, core_num_wrps); + core_num_brps + core_num_reserved_brps, core_num_wrps); - if (core_has_mismatch_bps()) - pr_info("1 breakpoint reserved for watchpoint single-step.\n"); + if (core_num_reserved_brps) + pr_info("%d breakpoint(s) reserved for watchpoint " + "single-step.\n", core_num_reserved_brps); ARM_DBG_READ(c1, 0, dscr); if (dscr & ARM_DSCR_HDBGEN) { From 93a04a3416da12647c47840ebe2bb812fcb801d0 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Mon, 29 Nov 2010 16:56:01 +0000 Subject: [PATCH 174/179] ARM: hw_breakpoint: do not allocate new breakpoints with preemption disabled The watchpoint single-stepping code calls register_user_hw_breakpoint to register a mismatch breakpoint for stepping over the watchpoint. This is performed with preemption disabled, which is unsafe as we may end up scheduling whilst in_atomic(). Furthermore, using the perf API is rather overkill since we are already in the hw-breakpoint backend and only require access to reserved breakpoints anyway. This patch reworks the watchpoint stepping code so that we don't require another perf_event for the mismatch breakpoint. Instead, we hold a separate arch_hw_breakpoint_ctrl struct inside the watchpoint which is used exclusively for stepping. We can check whether or not stepping is enabled when installing or uninstalling the watchpoint and operate on the breakpoint accordingly. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/include/asm/hw_breakpoint.h | 2 +- arch/arm/kernel/hw_breakpoint.c | 134 +++++++++++++++------------ 2 files changed, 78 insertions(+), 58 deletions(-) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 4d8ae9d67abe..881429d0b849 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint_ctrl { struct arch_hw_breakpoint { u32 address; u32 trigger; - struct perf_event *suspended_wp; + struct arch_hw_breakpoint_ctrl step_ctrl; struct arch_hw_breakpoint_ctrl ctrl; }; diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b16c4568cb01..81b63e94dfe0 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -315,23 +315,6 @@ u8 arch_get_max_wp_len(void) return max_watchpoint_len; } -/* - * Handler for reactivating a suspended watchpoint when the single - * step `mismatch' breakpoint is triggered. - */ -static void wp_single_step_handler(struct perf_event *bp, int unused, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - perf_event_enable(counter_arch_bp(bp)->suspended_wp); - unregister_hw_breakpoint(bp); -} - -static int bp_is_single_step(struct perf_event *bp) -{ - return bp->overflow_handler == wp_single_step_handler; -} - /* * Install a perf counter breakpoint. */ @@ -340,29 +323,35 @@ int arch_install_hw_breakpoint(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; int i, max_slots, ctrl_base, val_base, ret = 0; + u32 addr, ctrl; /* Ensure that we are in monitor mode and halting mode is disabled. */ ret = enable_monitor_mode(); if (ret) goto out; + addr = info->address; + ctrl = encode_ctrl_reg(info->ctrl) | 0x1; + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - info->ctrl.mismatch = 1; - i = max_slots; - slots[i] = bp; - goto setup; - } } else { /* Watchpoint */ - ctrl_base = ARM_BASE_WCR; - val_base = ARM_BASE_WVR; + if (info->step_ctrl.enabled) { + /* Install into the reserved breakpoint region. */ + ctrl_base = ARM_BASE_BCR + core_num_brps; + val_base = ARM_BASE_BVR + core_num_brps; + /* Override the watchpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } else { + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + } slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -381,12 +370,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) goto out; } -setup: /* Setup the address register. */ - write_wb_reg(val_base + i, info->address); + write_wb_reg(val_base + i, addr); /* Setup the control register. */ - write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); + write_wb_reg(ctrl_base + i, ctrl); out: return ret; @@ -403,15 +391,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - i = max_slots; - slots[i] = NULL; - goto reset; - } } else { /* Watchpoint */ - base = ARM_BASE_WCR; + if (info->step_ctrl.enabled) + base = ARM_BASE_BCR + core_num_brps; + else + base = ARM_BASE_WCR; slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -429,7 +414,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) return; -reset: /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -579,7 +563,7 @@ static int arch_build_bp_info(struct perf_event *bp) /* Privilege */ info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) + if (arch_check_bp_in_kernelspace(bp)) info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ @@ -664,22 +648,18 @@ static void update_mismatch_flag(int idx, int flag) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); struct arch_hw_breakpoint *info; - struct perf_event_attr attr; /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); - hw_breakpoint_init(&attr); - attr.bp_addr = regs->ARM_pc & ~0x3; - attr.bp_len = HW_BREAKPOINT_LEN_4; - attr.bp_type = HW_BREAKPOINT_X; - for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); - if (slots[i] == NULL) { + wp = slots[i]; + + if (wp == NULL) { rcu_read_unlock(); continue; } @@ -689,28 +669,64 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * single watchpoint, we can set the trigger to the lowest * possible faulting address. */ - info = counter_arch_bp(slots[i]); - info->trigger = slots[i]->attr.bp_addr; + info = counter_arch_bp(wp); + info->trigger = wp->attr.bp_addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); - perf_bp_event(slots[i], regs); + perf_bp_event(wp, regs); /* * If no overflow handler is present, insert a temporary * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!slots[i]->overflow_handler) { - bp = register_user_hw_breakpoint(&attr, - wp_single_step_handler, - current); - counter_arch_bp(bp)->suspended_wp = slots[i]; - perf_event_disable(slots[i]); + if (!wp->overflow_handler) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = regs->ARM_pc; + arch_install_hw_breakpoint(wp); } rcu_read_unlock(); } } +static void watchpoint_single_step_handler(unsigned long pc) +{ + int i; + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct arch_hw_breakpoint *info; + + for (i = 0; i < core_num_reserved_brps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + if (!info->step_ctrl.enabled) + goto unlock; + + /* + * Restore the original watchpoint if we've completed the + * single-step. + */ + if (info->trigger != pc) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(wp); + } + +unlock: + rcu_read_unlock(); + } +} + static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; @@ -723,7 +739,8 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { + /* Check the currently installed breakpoints first. */ + for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -750,7 +767,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) } unlock: - if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { + if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); } @@ -758,6 +775,9 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) update_mismatch_flag(i, mismatch); rcu_read_unlock(); } + + /* Handle any pending watchpoint single-step breakpoints. */ + watchpoint_single_step_handler(addr); } /* From 9ebb3cbcc39d4e61ae6751167086acfb5c201e6f Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 1 Dec 2010 14:12:13 +0000 Subject: [PATCH 175/179] ARM: hw_breakpoint: unify single-stepping code for watchpoints and breakpoints The single-stepping code is currently different depending on whether we are stepping over a breakpoint or a watchpoint. There is no good reason for this, so let's sort it out. This patch adds functions for enabling/disabling single-step for a particular hw_breakpoint and integrates this with the exception handling code. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/include/asm/hw_breakpoint.h | 4 +- arch/arm/kernel/hw_breakpoint.c | 81 ++++++++++++++-------------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 881429d0b849..f389b2704d82 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -20,8 +20,8 @@ struct arch_hw_breakpoint_ctrl { struct arch_hw_breakpoint { u32 address; u32 trigger; - struct arch_hw_breakpoint_ctrl step_ctrl; - struct arch_hw_breakpoint_ctrl ctrl; + struct arch_hw_breakpoint_ctrl step_ctrl; + struct arch_hw_breakpoint_ctrl ctrl; }; static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 81b63e94dfe0..36cd7680d3d2 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -339,6 +339,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; + if (info->step_ctrl.enabled) { + /* Override the breakpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } } else { /* Watchpoint */ if (info->step_ctrl.enabled) { @@ -628,21 +633,28 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) return ret; } -static void update_mismatch_flag(int idx, int flag) +/* + * Enable/disable single-stepping over the breakpoint bp at address addr. + */ +static void enable_single_step(struct perf_event *bp, u32 addr) { - struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); - struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - if (bp == NULL) - return; + arch_uninstall_hw_breakpoint(bp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = addr; + arch_install_hw_breakpoint(bp); +} - info = counter_arch_bp(bp); - - /* Update the mismatch field to enter/exit `single-step' mode */ - if (!bp->overflow_handler && info->ctrl.mismatch != flag) { - info->ctrl.mismatch = flag; - write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); - } +static void disable_single_step(struct perf_event *bp) +{ + arch_uninstall_hw_breakpoint(bp); + counter_arch_bp(bp)->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(bp); } static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) @@ -679,16 +691,8 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.mismatch = 1; - info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; - info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; - info->step_ctrl.privilege = info->ctrl.privilege; - info->step_ctrl.enabled = 1; - info->trigger = regs->ARM_pc; - arch_install_hw_breakpoint(wp); - } + if (!wp->overflow_handler) + enable_single_step(wp, instruction_pointer(regs)); rcu_read_unlock(); } @@ -716,11 +720,8 @@ static void watchpoint_single_step_handler(unsigned long pc) * Restore the original watchpoint if we've completed the * single-step. */ - if (info->trigger != pc) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.enabled = 0; - arch_install_hw_breakpoint(wp); - } + if (info->trigger != pc) + disable_single_step(wp); unlock: rcu_read_unlock(); @@ -730,7 +731,6 @@ static void watchpoint_single_step_handler(unsigned long pc) static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - int mismatch; u32 ctrl_reg, val, addr; struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); struct arch_hw_breakpoint *info; @@ -745,34 +745,33 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) bp = slots[i]; - if (bp == NULL) { - rcu_read_unlock(); - continue; - } + if (bp == NULL) + goto unlock; - mismatch = 0; + info = counter_arch_bp(bp); /* Check if the breakpoint value matches. */ val = read_wb_reg(ARM_BASE_BVR + i); if (val != (addr & ~0x3)) - goto unlock; + goto mismatch; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); decode_ctrl_reg(ctrl_reg, &ctrl); if ((1 << (addr & 0x3)) & ctrl.len) { - mismatch = 1; - info = counter_arch_bp(bp); info->trigger = addr; - } - -unlock: - if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); + if (!bp->overflow_handler) + enable_single_step(bp, addr); + goto unlock; } - update_mismatch_flag(i, mismatch); +mismatch: + /* If we're stepping a breakpoint, it can now be restored. */ + if (info->step_ctrl.enabled) + disable_single_step(bp); +unlock: rcu_read_unlock(); } From 3ce70b2e24cd35cc9f2df8cf5205b8ab4e6178e1 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 1 Dec 2010 17:05:24 +0000 Subject: [PATCH 176/179] ARM: hw_breakpoint: disallow per-cpu breakpoints without overflow handler Single-stepping a breakpoint requires us to disable it temporarily so that we don't get stuck in a recursive debug trap. With per-cpu breakpoints this presents a problem where an interrupt can be taken before the single-step has completed and a new task is eventually scheduled. This new task will not hit the breakpoint because it will have been disabled during the previous handling code. This patch disallows per-cpu breakpoints on ARM when an overflow handler is not present. A similar effect can be created by placing breakpoints on a shell and then running applications there. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 36cd7680d3d2..eef1b1e235a7 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -622,10 +622,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware single-step. + * we can use the mismatch feature as a poor-man's hardware + * single-step, but this only works for per-task breakpoints. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps() + || !bp->hw.bp_target), "overflow handler required but none found")) { ret = -EINVAL; } From ce9b1b09520789223f72a9fefd5f0e329f8d89d0 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Thu, 25 Nov 2010 12:59:31 +0000 Subject: [PATCH 177/179] ARM: ptrace: fix style issue with hw_breakpoint interface This patch fixes a trivial style issue in ptrace.c. Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 3e97483abcf0..19c6816db61e 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num, goto out; if ((gen_type & implied_type) != gen_type) { - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } attr.bp_len = gen_len; From 4a55c18e2023096c8684fae5fa1cfa96a03172ff Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Mon, 29 Nov 2010 17:06:53 +0000 Subject: [PATCH 178/179] ARM: hw_breakpoint: fix warnings generated by sparse sparse doesn't like per-cpu accesses such as: static DEFINE_PER_CPU(struct perf_event *, foo[MAXLEN]); struct perf_event **bar = __get_cpu_var(foo); and shouts quite loudly about it: | warning: incorrect type in assignment (different modifiers) | expected struct perf_event **slots | got struct perf_event *[noderef] *<noident> This patch adds casts to these sorts of assignments in hw_breakpoint.c in order to silence the warnings. Reported-by: Russell King <rmk+kernel@arm.linux.org.uk> Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eef1b1e235a7..56ed9a62013b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -337,7 +337,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; if (info->step_ctrl.enabled) { /* Override the breakpoint data with the step data. */ @@ -357,7 +357,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_WCR; val_base = ARM_BASE_WVR; } - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -394,7 +394,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ @@ -402,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR + core_num_brps; else base = ARM_BASE_WCR; - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -662,9 +662,11 @@ static void disable_single_step(struct perf_event *bp) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); @@ -703,9 +705,11 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) static void watchpoint_single_step_handler(unsigned long pc) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + for (i = 0; i < core_num_reserved_brps; ++i) { rcu_read_lock(); @@ -734,10 +738,12 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; u32 ctrl_reg, val, addr; - struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); + struct perf_event *bp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; From 8fbf397c3389c1dedfa9ee412715046ab28fd82d Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 1 Dec 2010 17:37:45 +0000 Subject: [PATCH 179/179] ARM: hw_breakpoint: do not fail initcall if monitor mode is disabled The debug registers can only be manipulated from software if monitor debug mode is enabled. On some cores, this can never be enabled (i.e. the corresponding bit in the DSCR is RAZ/WI). This patch ensures we can handle this hardware configuration and fail gracefully, rather than blow up the kernel during boot. Reported-by: Cyril Chemparathy <cyril@ti.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- arch/arm/kernel/hw_breakpoint.c | 54 +++++++++++++++------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 56ed9a62013b..c9f3f0467570 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -219,23 +219,6 @@ static int get_num_brps(void) return brps; } -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -256,8 +239,12 @@ static int enable_monitor_mode(void) goto out; } + /* If monitor mode is already enabled, just return. */ + if (dscr & ARM_DSCR_MDBGEN) + goto out; + /* Write to the corresponding DSCR. */ - switch (debug_arch) { + switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); @@ -272,15 +259,30 @@ static int enable_monitor_mode(void) /* Check that the write made it through. */ ARM_DBG_READ(c1, 0, dscr); - if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), - "failed to enable monitor mode.")) { + if (!(dscr & ARM_DSCR_MDBGEN)) ret = -EPERM; - } out: return ret; } +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * Check if 8-bit byte-address select is available. * This clobbers WRP 0. @@ -294,9 +296,6 @@ static u8 get_max_wp_len(void) if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) goto out; - if (enable_monitor_mode()) - goto out; - memset(&ctrl, 0, sizeof(ctrl)); ctrl.len = ARM_BREAKPOINT_LEN_8; ctrl_reg = encode_ctrl_reg(ctrl); @@ -879,15 +878,13 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { static int __init arch_hw_breakpoint_init(void) { - int ret = 0; u32 dscr; debug_arch = get_debug_arch(); if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { pr_info("debug architecture 0x%x unsupported.\n", debug_arch); - ret = -ENODEV; - goto out; + return 0; } /* Determine how many BRPs/WRPs are available. */ @@ -928,8 +925,7 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&dbg_reset_nb); -out: - return ret; + return 0; } arch_initcall(arch_hw_breakpoint_init);