From 43f7392ba9e2585bf34f21399b1ed78692b5d437 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 3 Jan 2009 23:56:27 +0100 Subject: [PATCH 01/49] intel-iommu: fix build error with INTR_REMAP=y and DMAR=n This fix should be safe since iommu->agaw is only used in intel-iommu.c. And this file is only compiled with DMAR=y. Signed-off-by: Joerg Roedel Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f5a662a50acb..2b4162d9ca30 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -491,7 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int map_size; u32 ver; static int iommu_allocated = 0; - int agaw; + int agaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -507,6 +507,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); +#ifdef CONFIG_DMAR agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR @@ -514,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id); goto error; } +#endif iommu->agaw = agaw; /* the registers might be more than one page */ From 704126ad81b8cb7d3d70adb9ecb143f4d3fb38af Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 4 Jan 2009 16:28:52 +0800 Subject: [PATCH 02/49] VT-d: handle Invalidation Queue Error to avoid system hang When hardware detects any error with a descriptor from the invalidation queue, it stops fetching new descriptors from the queue until software clears the Invalidation Queue Error bit in the Fault Status register. Following fix handles the IQE so the kernel won't be trapped in an infinite loop. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 61 ++++++++++++++++++++++++++---------- drivers/pci/intr_remapping.c | 21 +++++++------ include/linux/intel-iommu.h | 3 +- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 2b4162d9ca30..8d3e9c261061 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -573,19 +573,49 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } +static int qi_check_fault(struct intel_iommu *iommu, int index) +{ + u32 fault; + int head; + struct q_inval *qi = iommu->qi; + int wait_index = (index + 1) % QI_LENGTH; + + fault = readl(iommu->reg + DMAR_FSTS_REG); + + /* + * If IQE happens, the head points to the descriptor associated + * with the error. No new descriptors are fetched until the IQE + * is cleared. + */ + if (fault & DMA_FSTS_IQE) { + head = readl(iommu->reg + DMAR_IQH_REG); + if ((head >> 4) == index) { + memcpy(&qi->desc[index], &qi->desc[wait_index], + sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &qi->desc[index], + sizeof(struct qi_desc)); + writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + return -EINVAL; + } + } + + return 0; +} + /* * Submit the queued invalidation descriptor to the remapping * hardware unit and wait for its completion. */ -void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { + int rc = 0; struct q_inval *qi = iommu->qi; struct qi_desc *hw, wait_desc; int wait_index, index; unsigned long flags; if (!qi) - return; + return 0; hw = qi->desc; @@ -603,7 +633,8 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw[index] = *desc; - wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_STATUS_WRITE | QI_IWD_TYPE; wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); hw[wait_index] = wait_desc; @@ -614,13 +645,11 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; - spin_lock(&iommu->register_lock); /* * update the HW tail register indicating the presence of * new descriptors. */ writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); - spin_unlock(&iommu->register_lock); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -630,15 +659,21 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * a deadlock where the interrupt context can wait indefinitely * for free slots in the queue. */ + rc = qi_check_fault(iommu, index); + if (rc) + goto out; + spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); } - - qi->desc_status[index] = QI_DONE; +out: + qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; reclaim_free_desc(qi); spin_unlock_irqrestore(&qi->q_lock, flags); + + return rc; } /* @@ -651,13 +686,13 @@ void qi_global_iec(struct intel_iommu *iommu) desc.low = QI_IEC_TYPE; desc.high = 0; + /* should never fail */ qi_submit_sync(&desc, iommu); } int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type, int non_present_entry_flush) { - struct qi_desc desc; if (non_present_entry_flush) { @@ -671,10 +706,7 @@ int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - qi_submit_sync(&desc, iommu); - - return 0; - + return qi_submit_sync(&desc, iommu); } int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, @@ -704,10 +736,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - qi_submit_sync(&desc, iommu); - - return 0; - + return qi_submit_sync(&desc, iommu); } /* diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b22529..45effc5726c0 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -207,7 +207,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) return index; } -static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) { struct qi_desc desc; @@ -215,7 +215,7 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) | QI_IEC_SELECTIVE; desc.high = 0; - qi_submit_sync(&desc, iommu); + return qi_submit_sync(&desc, iommu); } int map_irq_to_irte_handle(int irq, u16 *sub_handle) @@ -283,6 +283,7 @@ int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) int modify_irte(int irq, struct irte *irte_modified) { + int rc; int index; struct irte *irte; struct intel_iommu *iommu; @@ -303,14 +304,15 @@ int modify_irte(int irq, struct irte *irte_modified) set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); __iommu_flush_cache(iommu, irte, sizeof(*irte)); - qi_flush_iec(iommu, index, 0); - + rc = qi_flush_iec(iommu, index, 0); spin_unlock(&irq_2_ir_lock); - return 0; + + return rc; } int flush_irte(int irq) { + int rc; int index; struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; @@ -326,10 +328,10 @@ int flush_irte(int irq) index = irq_iommu->irte_index + irq_iommu->sub_handle; - qi_flush_iec(iommu, index, irq_iommu->irte_mask); + rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); spin_unlock(&irq_2_ir_lock); - return 0; + return rc; } struct intel_iommu *map_ioapic_to_ir(int apic) @@ -355,6 +357,7 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) int free_irte(int irq) { + int rc = 0; int index, i; struct irte *irte; struct intel_iommu *iommu; @@ -375,7 +378,7 @@ int free_irte(int irq) if (!irq_iommu->sub_handle) { for (i = 0; i < (1 << irq_iommu->irte_mask); i++) set_64bit((unsigned long *)irte, 0); - qi_flush_iec(iommu, index, irq_iommu->irte_mask); + rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); } irq_iommu->iommu = NULL; @@ -385,7 +388,7 @@ int free_irte(int irq) spin_unlock(&irq_2_ir_lock); - return 0; + return rc; } static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..d2e3cbfba14f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -194,6 +194,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* FSTS_REG */ #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) +#define DMA_FSTS_IQE (1 << 4) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -328,7 +329,7 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); +extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); From 1b0e235cc9bfae4bc0f5cd0cba929206fb0f6a64 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Feb 2009 00:54:07 -0800 Subject: [PATCH 03/49] sparc64: Fix crashes in jbusmc_print_dimm() Return was missing for the case where there is no dimm info match. Signed-off-by: David S. Miller --- arch/sparc/kernel/chmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index 3b9f4d6e14a9..e1a9598e2a4d 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -306,6 +306,7 @@ static int jbusmc_print_dimm(int syndrome_code, buf[1] = '?'; buf[2] = '?'; buf[3] = '\0'; + return 0; } p = dp->controller; prop = &p->layout; From 084eb960e81505680a9963665722d1bfd94af6a7 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Wed, 11 Feb 2009 13:24:19 -0800 Subject: [PATCH 04/49] intel-iommu: fix endless "Unknown DMAR structure type" loop I have a SuperMicro C2SBX motherboard with BIOS revision 1.0b. With vt-d enabled in the BIOS, Linux gets into an endless loop printing "DMAR:Unknown DMAR structure type" when booting. Here is the DMAR ACPI table: DMAR @ 0x7fe86dec 0000: 44 4d 41 52 98 00 00 00 01 6f 49 6e 74 65 6c 20 DMAR.....oIntel 0010: 4f 45 4d 44 4d 41 52 20 00 00 04 06 4c 4f 48 52 OEMDMAR ....LOHR 0020: 01 00 00 00 23 00 00 00 00 00 00 00 00 00 00 00 ....#........... 0030: 01 00 58 00 00 00 00 00 00 a0 e8 7f 00 00 00 00 ..X............. 0040: ff ff ef 7f 00 00 00 00 01 08 00 00 00 00 1d 00 ................ 0050: 01 08 00 00 00 00 1d 01 01 08 00 00 00 00 1d 02 ................ 0060: 01 08 00 00 00 00 1d 07 01 08 00 00 00 00 1a 00 ................ 0070: 01 08 00 00 00 00 1a 01 01 08 00 00 00 00 1a 02 ................ 0080: 01 08 00 00 00 00 1a 07 01 08 00 00 00 00 1a 07 ................ 0090: c0 00 68 00 04 10 66 60 ..h...f` Here are the messages printed by the kernel: DMAR:Host address width 36 DMAR:RMRR base: 0x000000007fe8a000 end: 0x000000007fefffff DMAR:Unknown DMAR structure type DMAR:Unknown DMAR structure type DMAR:Unknown DMAR structure type ... Although I not very familiar with ACPI, to me it looks like struct acpi_dmar_header::length == 0x0058 is incorrect, causing parse_dmar_table() to look at an invalid offset on the next loop. This offset happens to have struct acpi_dmar_header::length == 0x0000, which prevents the loop from ever terminating. This patch checks for this condition and bails out instead of looping forever. Signed-off-by: Tony Battersby Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 8d3e9c261061..26c536b51c5a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -330,6 +330,14 @@ parse_dmar_table(void) entry_header = (struct acpi_dmar_header *)(dmar + 1); while (((unsigned long)entry_header) < (((unsigned long)dmar) + dmar_tbl->length)) { + /* Avoid looping forever on bad ACPI tables */ + if (entry_header->length == 0) { + printk(KERN_WARNING PREFIX + "Invalid 0-length structure\n"); + ret = -EINVAL; + break; + } + dmar_table_print_dmar_entry(entry_header); switch (entry_header->type) { From e32740d9786b8a6c54f6e3d670567d9ef57b3b8c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 19 Feb 2009 11:58:37 -0800 Subject: [PATCH 05/49] ALSA: pcxhr.h replace signed one-bit bitfields The usage and comments make it clear values of 1/0 were intended rather than -1/0 Noticed by sparse: sound/pci/pcxhr/pcxhr.h:100:20: error: dubious one-bit signed bitfield sound/pci/pcxhr/pcxhr.h:101:22: error: dubious one-bit signed bitfield sound/pci/pcxhr/pcxhr.h:102:24: error: dubious one-bit signed bitfield sound/pci/pcxhr/pcxhr.h:103:21: error: dubious one-bit signed bitfield sound/pci/pcxhr/pcxhr.h:104:25: error: dubious one-bit signed bitfield sound/pci/pcxhr/pcxhr.h:105:20: error: dubious one-bit signed bitfield Signed-off-by: Harvey Harrison Signed-off-by: Takashi Iwai --- sound/pci/pcxhr/pcxhr.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/pcxhr/pcxhr.h b/sound/pci/pcxhr/pcxhr.h index 84131a916c92..69d87dee6995 100644 --- a/sound/pci/pcxhr/pcxhr.h +++ b/sound/pci/pcxhr/pcxhr.h @@ -97,12 +97,12 @@ struct pcxhr_mgr { int capture_chips; int fw_file_set; int firmware_num; - int is_hr_stereo:1; - int board_has_aes1:1; /* if 1 board has AES1 plug and SRC */ - int board_has_analog:1; /* if 0 the board is digital only */ - int board_has_mic:1; /* if 1 the board has microphone input */ - int board_aes_in_192k:1;/* if 1 the aes input plugs do support 192kHz */ - int mono_capture:1; /* if 1 the board does mono capture */ + unsigned int is_hr_stereo:1; + unsigned int board_has_aes1:1; /* if 1 board has AES1 plug and SRC */ + unsigned int board_has_analog:1; /* if 0 the board is digital only */ + unsigned int board_has_mic:1; /* if 1 the board has microphone input */ + unsigned int board_aes_in_192k:1;/* if 1 the aes input plugs do support 192kHz */ + unsigned int mono_capture:1; /* if 1 the board does mono capture */ struct snd_dma_buffer hostport; From 55290e1932102f57ea17e7cff895914c2dbdb4c4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 20 Feb 2009 15:59:01 +0100 Subject: [PATCH 06/49] ALSA: hda - Fix parse of init_verbs sysfs entry Fixed the parse of init_verbs hwdep sysfs entry. Simplieied using sscanf. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_hwdep.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c index 482fb0304ca9..4ae51dcb81af 100644 --- a/sound/pci/hda/hda_hwdep.c +++ b/sound/pci/hda/hda_hwdep.c @@ -277,18 +277,19 @@ static ssize_t init_verbs_store(struct device *dev, { struct snd_hwdep *hwdep = dev_get_drvdata(dev); struct hda_codec *codec = hwdep->private_data; - char *p; - struct hda_verb verb, *v; + struct hda_verb *v; + int nid, verb, param; - verb.nid = simple_strtoul(buf, &p, 0); - verb.verb = simple_strtoul(p, &p, 0); - verb.param = simple_strtoul(p, &p, 0); - if (!verb.nid || !verb.verb || !verb.param) + if (sscanf(buf, "%i %i %i", &nid, &verb, ¶m) != 3) + return -EINVAL; + if (!nid || !verb) return -EINVAL; v = snd_array_new(&codec->init_verbs); if (!v) return -ENOMEM; - *v = verb; + v->nid = nid; + v->verb = verb; + v->param = param; return count; } From 6a63209fc02d5483371f07e4913ee8abad608051 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Feb 2009 11:00:09 -0500 Subject: [PATCH 07/49] Btrfs: add better -ENOSPC handling This is a step in the direction of better -ENOSPC handling. Instead of checking the global bytes counter we check the space_info bytes counters to make sure we have enough space. If we don't we go ahead and try to allocate a new chunk, and then if that fails we return -ENOSPC. This patch adds two counters to btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for extents we've actually setup for delalloc and will be allocated at some point down the line. bytes_may_use is to keep track of how many bytes we may use for delalloc at some point. When we actually set the extent_bit for the delalloc bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps us from not actually being able to allocate space for any delalloc bytes. Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 8 ++ fs/btrfs/ctree.h | 40 ++++++-- fs/btrfs/extent-tree.c | 215 ++++++++++++++++++++++++++++++++++++++--- fs/btrfs/file.c | 16 ++- fs/btrfs/inode.c | 62 +++--------- fs/btrfs/ioctl.c | 6 +- 6 files changed, 271 insertions(+), 76 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a8c9693b75ac..72677ce2b74f 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -66,6 +66,9 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; + /* the space_info for where this inode's data allocations are done */ + struct btrfs_space_info *space_info; + /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ @@ -94,6 +97,11 @@ struct btrfs_inode { */ u64 delalloc_bytes; + /* total number of bytes that may be used for this inode for + * delalloc + */ + u64 reserved_bytes; + /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 766b31ae3186..82491ba8fa40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,13 +596,27 @@ struct btrfs_block_group_item { struct btrfs_space_info { u64 flags; - u64 total_bytes; - u64 bytes_used; - u64 bytes_pinned; - u64 bytes_reserved; - u64 bytes_readonly; - int full; - int force_alloc; + + u64 total_bytes; /* total bytes in the space */ + u64 bytes_used; /* total bytes used on disk */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + /* delalloc accounting */ + u64 bytes_delalloc; /* number of bytes reserved for allocation, + this space is not necessarily reserved yet + by the allocator */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc */ + + int full; /* indicates that we cannot allocate any more + chunks for this space */ + int force_alloc; /* set if we need to force a chunk alloc for + this space */ + struct list_head list; /* for block groups in our same type */ @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes); +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0a5d796c9f7e..e11875e97c2f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags, int force); + static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_pinned = 0; found->bytes_reserved = 0; found->bytes_readonly = 0; + found->bytes_delalloc = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return flags; } +static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) +{ + struct btrfs_fs_info *info = root->fs_info; + u64 alloc_profile; + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (root == root->fs_info->chunk_root) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + return btrfs_reduce_alloc_profile(root, data); +} + +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) +{ + u64 alloc_target; + + alloc_target = btrfs_get_alloc_profile(root, 1); + BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, + alloc_target); +} + +/* + * for now this just makes sure we have at least 5% of our metadata space free + * for use. + */ +int btrfs_check_metadata_free_space(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 alloc_target, thresh; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + /* + * if the metadata area isn't maxed out then there is no sense in + * checking how much is used, since we can always allocate a new chunk + */ + if (!meta_sinfo->full) + return 0; + + spin_lock(&meta_sinfo->lock); + thresh = meta_sinfo->total_bytes * 95; + + do_div(thresh, 100); + + if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + spin_unlock(&meta_sinfo->lock); + return -ENOSPC; + } + spin_unlock(&meta_sinfo->lock); + + return 0; +} + +/* + * This will check the space that the inode allocates from to make sure we have + * enough space for bytes. + */ +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + int ret = 0; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; +again: + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + if (data_sinfo->total_bytes - data_sinfo->bytes_used - + data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - + data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - + data_sinfo->bytes_may_use < bytes) { + /* + * if we don't have enough free bytes in this space then we need + * to alloc a new chunk. + */ + if (!data_sinfo->full) { + u64 alloc_target; + struct btrfs_trans_handle *trans; + + data_sinfo->force_alloc = 1; + spin_unlock(&data_sinfo->lock); + + alloc_target = btrfs_get_alloc_profile(root, 1); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + bytes + 2 * 1024 * 1024, + alloc_target, 0); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + goto again; + } + spin_unlock(&data_sinfo->lock); + printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" + ", %llu bytes_used, %llu bytes_reserved, " + "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu total\n", bytes, data_sinfo->bytes_delalloc, + data_sinfo->bytes_used, data_sinfo->bytes_reserved, + data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, + data_sinfo->bytes_may_use, data_sinfo->total_bytes); + return -ENOSPC; + } + data_sinfo->bytes_may_use += bytes; + BTRFS_I(inode)->reserved_bytes += bytes; + spin_unlock(&data_sinfo->lock); + + return btrfs_check_metadata_free_space(root); +} + +/* + * if there was an error for whatever reason after calling + * btrfs_check_data_free_space, call this so we can cleanup the counters. + */ +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + spin_unlock(&data_sinfo->lock); +} + +/* called when we are adding a delalloc extent to the inode's io_tree */ +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* get the space info for where this inode will be storing its data */ + data_sinfo = BTRFS_I(inode)->space_info; + + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_delalloc += bytes; + + /* + * we are adding a delalloc extent without calling + * btrfs_check_data_free_space first. This happens on a weird + * writepage condition, but shouldn't hurt our accounting + */ + if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { + data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; + BTRFS_I(inode)->reserved_bytes = 0; + } else { + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + } + + spin_unlock(&data_sinfo->lock); +} + +/* called when we are clearing an delalloc extent from the inode's io_tree */ +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *info; + + info = BTRFS_I(inode)->space_info; + + spin_lock(&info->lock); + info->bytes_delalloc -= bytes; + spin_unlock(&info->lock); +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved), (info->full) ? "" : "not "); + printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," + " may_use=%llu, used=%llu\n", info->total_bytes, + info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, + info->bytes_used); down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { @@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, { int ret; u64 search_start = 0; - u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; - if (data) { - alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if (root == root->fs_info->chunk_root) { - alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; - } else { - alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; - } + data = btrfs_get_alloc_profile(root, data); again: - data = btrfs_reduce_alloc_profile(root, data); /* * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 872f104576e5..dc78954861b3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1091,19 +1091,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); - ret = btrfs_check_free_space(root, write_bytes, 0); + ret = btrfs_check_data_free_space(root, inode, write_bytes); if (ret) goto out; ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); btrfs_drop_pages(pages, num_pages); goto out; } @@ -1111,8 +1116,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); btrfs_drop_pages(pages, num_pages); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } if (will_write) { btrfs_fdatawrite_range(inode->i_mapping, pos, @@ -1136,6 +1144,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); + if (ret) + err = ret; out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3cee77ae03c8..7d4f948bc22a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -101,34 +101,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) return err; } -/* - * a very lame attempt at stopping writes when the FS is 85% full. There - * are countless ways this is incorrect, but it is better than nothing. - */ -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del) -{ - u64 total; - u64 used; - u64 thresh; - int ret = 0; - - spin_lock(&root->fs_info->delalloc_lock); - total = btrfs_super_total_bytes(&root->fs_info->super_copy); - used = btrfs_super_bytes_used(&root->fs_info->super_copy); - if (for_del) - thresh = total * 90; - else - thresh = total * 85; - - do_div(thresh, 100); - - if (used + root->fs_info->delalloc_bytes + num_required > thresh) - ret = -ENOSPC; - spin_unlock(&root->fs_info->delalloc_lock); - return ret; -} - /* * this does all the hard work for inserting an inline extent into * the btree. The caller should have done a btrfs_drop_extents so that @@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, (unsigned long long)end - start + 1, (unsigned long long) root->fs_info->delalloc_bytes); + btrfs_delalloc_free_space(root, inode, (u64)-1); root->fs_info->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0; } else { + btrfs_delalloc_free_space(root, inode, + end - start + 1); root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } @@ -2245,10 +2221,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2261,7 +2233,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2284,10 +2255,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2304,7 +2271,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) fail_trans: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); if (ret && !err) @@ -2818,7 +2784,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) return err; @@ -3014,6 +2980,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->last_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; + bi->reserved_bytes = 0; bi->disk_i_size = 0; bi->flags = 0; bi->index_cnt = (u64)-1; @@ -3035,6 +3002,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; init_btrfs_i(inode); BTRFS_I(inode)->root = args->root; + btrfs_set_inode_space_info(args->root, inode); return 0; } @@ -3455,6 +3423,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->index_cnt = 2; BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; + btrfs_set_inode_space_info(root, inode); if (mode & S_IFDIR) owner = 0; @@ -3602,7 +3571,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; @@ -3665,7 +3634,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; trans = btrfs_start_transaction(root, 1); @@ -3733,7 +3702,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return -ENOENT; btrfs_inc_nlink(inode); - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; err = btrfs_set_inode_index(dir, &index); @@ -3779,7 +3748,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_unlock; @@ -4336,7 +4305,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) u64 page_start; u64 page_end; - ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) goto out; @@ -4349,6 +4318,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) if ((page->mapping != inode->i_mapping) || (page_start >= size)) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); /* page got truncated out from underneath us */ goto out_unlock; } @@ -4631,7 +4601,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -EXDEV; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto out_unlock; @@ -4749,7 +4719,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_fail; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 988fdc8b49eb..bca729fc80c8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -70,7 +70,7 @@ static noinline int create_subvol(struct btrfs_root *root, u64 index = 0; unsigned long nr = 1; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_commit; @@ -203,7 +203,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (!root->ref_cows) return -EINVAL; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_unlock; @@ -374,7 +374,7 @@ static int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - ret = btrfs_check_free_space(root, inode->i_size, 0); + ret = btrfs_check_data_free_space(root, inode, inode->i_size); if (ret) return -ENOSPC; From 4e06bdd6cbd5105376e7caf4e683ed131e777389 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Feb 2009 10:59:53 -0500 Subject: [PATCH 08/49] Btrfs: try committing transaction before returning ENOSPC This fixes a problem where we could return -ENOSPC when we may actually have plenty of space, the space is just pinned. Instead of returning -ENOSPC immediately, commit the transaction first and then try and do the allocation again. This patch also does chunk allocation for metadata if we pass the 80% threshold for metadata space. This will help with stack usage since the chunk allocation will happen early on, instead of when the allocation is happening. Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e11875e97c2f..6b5966aacf44 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2017,26 +2017,49 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root) struct btrfs_fs_info *info = root->fs_info; struct btrfs_space_info *meta_sinfo; u64 alloc_target, thresh; + int committed = 0, ret; /* get the space info for where the metadata will live */ alloc_target = btrfs_get_alloc_profile(root, 0); meta_sinfo = __find_space_info(info, alloc_target); - /* - * if the metadata area isn't maxed out then there is no sense in - * checking how much is used, since we can always allocate a new chunk - */ - if (!meta_sinfo->full) - return 0; - +again: spin_lock(&meta_sinfo->lock); - thresh = meta_sinfo->total_bytes * 95; + if (!meta_sinfo->full) + thresh = meta_sinfo->total_bytes * 80; + else + thresh = meta_sinfo->total_bytes * 95; do_div(thresh, 100); if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + struct btrfs_trans_handle *trans; + if (!meta_sinfo->full) { + meta_sinfo->force_alloc = 1; + spin_unlock(&meta_sinfo->lock); + + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + 2 * 1024 * 1024, alloc_target, 0); + btrfs_end_transaction(trans, root); + goto again; + } spin_unlock(&meta_sinfo->lock); + + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } return -ENOSPC; } spin_unlock(&meta_sinfo->lock); @@ -2052,7 +2075,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; - int ret = 0; + int ret = 0, committed = 0; /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); @@ -2065,13 +2088,14 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - data_sinfo->bytes_may_use < bytes) { + struct btrfs_trans_handle *trans; + /* * if we don't have enough free bytes in this space then we need * to alloc a new chunk. */ if (!data_sinfo->full) { u64 alloc_target; - struct btrfs_trans_handle *trans; data_sinfo->force_alloc = 1; spin_unlock(&data_sinfo->lock); @@ -2090,6 +2114,19 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, goto again; } spin_unlock(&data_sinfo->lock); + + /* commit the current transaction and try again */ + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } + printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " "%llu bytes_pinned, %llu bytes_readonly, %llu may use" From e8bf069c419c1dc0657e02636441fe1179a9db14 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Sun, 22 Feb 2009 14:42:54 +0200 Subject: [PATCH 09/49] ALSA: aw2: do not grab every saa7146 based device Audiowerk2 driver snd-aw2 is bound to any saa7146 device as it does not check subsystem ids. Many DVB devices are saa7146 based, so aw2 driver grabs them as well. According to http://lkml.org/lkml/2008/10/15/311 aw2 devices have the subsystem ids set to 0, the saa7146 default. Fix conflicts with DVB devices by checking for subsystem ids = 0 specifically. Signed-off-by: Anssi Hannula Signed-off-by: Takashi Iwai --- sound/pci/aw2/aw2-alsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index 3f00ddf450f8..c7c54e7748e9 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -165,7 +165,7 @@ module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Audiowerk2 soundcard."); static struct pci_device_id snd_aw2_ids[] = { - {PCI_VENDOR_ID_SAA7146, PCI_DEVICE_ID_SAA7146, PCI_ANY_ID, PCI_ANY_ID, + {PCI_VENDOR_ID_SAA7146, PCI_DEVICE_ID_SAA7146, 0, 0, 0, 0, 0}, {0} }; From 5370d96f85962769ea3df3a81cc885f257c51589 Mon Sep 17 00:00:00 2001 From: Steve Chen Date: Sat, 21 Feb 2009 08:05:04 -0600 Subject: [PATCH 10/49] ALSA: fix excessive background noise introduced by OSS emulation rate shrink Incorrect variable was used to get the next sample which caused S2 to be stuck with the same value resulting in loud background noise. Signed-off-by: Steve Chen Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/oss/rate.c b/sound/core/oss/rate.c index a466443c4a26..2fa9299a440d 100644 --- a/sound/core/oss/rate.c +++ b/sound/core/oss/rate.c @@ -157,7 +157,7 @@ static void resample_shrink(struct snd_pcm_plugin *plugin, while (dst_frames1 > 0) { S1 = S2; if (src_frames1-- > 0) { - S1 = *src; + S2 = *src; src += src_step; } if (pos & ~R_MASK) { From 2d4663816064fabb68935f920bbd7ccdc7f9392d Mon Sep 17 00:00:00 2001 From: Luke Yelavich Date: Mon, 23 Feb 2009 13:00:33 +1100 Subject: [PATCH 11/49] ALSA: hda - add another MacBook Pro 3,1 SSID Reference: Ubuntu bug #33245 https://bugs.launchpad.net/bugs/332456 Signed-off-by: Luke Yelavich Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ed8fcbd60003..f6571224b34e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7017,6 +7017,7 @@ static int patch_alc882(struct hda_codec *codec) case 0x106b3e00: /* iMac 24 Aluminium */ board_config = ALC885_IMAC24; break; + case 0x106b00a0: /* MacBookPro3,1 - Another revision */ case 0x106b00a1: /* Macbook (might be wrong - PCI SSID?) */ case 0x106b00a4: /* MacbookPro4,1 */ case 0x106b2c00: /* Macbook Pro rev3 */ From cc374c477c9bf95f409fed16426856d86a97394f Mon Sep 17 00:00:00 2001 From: Juan Jesus Garcia de Soria Date: Mon, 23 Feb 2009 08:11:59 +0100 Subject: [PATCH 12/49] ALSA: hda - Quirk for Acer Aspire 6530G The Acer Aspire 6530G needs the 4930G "model" for the front mic to work properly. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f6571224b34e..a680be0d4534 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8470,6 +8470,8 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = { ALC888_ACER_ASPIRE_4930G), SND_PCI_QUIRK(0x1025, 0x015e, "Acer Aspire 6930G", ALC888_ACER_ASPIRE_4930G), + SND_PCI_QUIRK(0x1025, 0x0166, "Acer Aspire 6530G", + ALC888_ACER_ASPIRE_4930G), SND_PCI_QUIRK(0x1025, 0, "Acer laptop", ALC883_ACER), /* default Acer */ SND_PCI_QUIRK(0x1028, 0x020d, "Dell Inspiron 530", ALC888_6ST_DELL), SND_PCI_QUIRK(0x103c, 0x2a3d, "HP Pavillion", ALC883_6ST_DIG), From 1f9da5544073d38e05139f8ce9da24e78653c73e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Feb 2009 15:31:02 +0100 Subject: [PATCH 13/49] ALSA: emu10k1 - Fix digital/analog switch on audigy2 ZS Fix the inverted logic of shared spdif switch. Reference: Novell bnc#478496 https://bugzilla.novell.com/show_bug.cgi?id=478496 Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 7958006a1d66..101a1c13a20d 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1528,6 +1528,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10021102, .driver = "Audigy2", .name = "SB Audigy 2 Platinum [SB0240P]", From 09b4068a7fe442efc40e9dcbcf5ff37c3338ab15 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2009 13:18:47 +1100 Subject: [PATCH 14/49] md/raid10: Don't skip more than 1 bitmap-chunk at a time during recovery. When doing recovery on a raid10 with a write-intent bitmap, we only need to recovery chunks that are flagged in the bitmap. However if we choose to skip a chunk as it isn't flag, the code currently skips the whole raid10-chunk, thus it might not recovery some blocks that need recovering. This patch fixes it. In case that is confusing, it might help to understand that there is a 'raid10 chunk size' which guides how data is distributed across the devices, and a 'bitmap chunk size' which says how much data corresponds to a single bit in the bitmap. This bug only affects cases where the bitmap chunk size is smaller than the raid10 chunk size. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid10.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6736d6dff981..118f89e716ea 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2010,13 +2010,13 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i /* There is nowhere to write, so all non-sync * drives must be failed, so try the next chunk... */ - { - sector_t sec = max_sector - sector_nr; - sectors_skipped += sec; + if (sector_nr + max_sync < max_sector) + max_sector = sector_nr + max_sync; + + sectors_skipped += (max_sector - sector_nr); chunks_skipped ++; sector_nr = max_sector; goto skipped; - } } static int run(mddev_t *mddev) From 78200d45cde2a79c0d0ae0407883bb264caa3c18 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2009 13:18:47 +1100 Subject: [PATCH 15/49] md/raid10: Don't call bitmap_cond_end_sync when we are doing recovery. For raid1/4/5/6, resync (fixing inconsistencies between devices) is very similar to recovery (rebuilding a failed device onto a spare). The both walk through the device addresses in order. For raid10 it can be quite different. resync follows the 'array' address, and makes sure all copies are the same. Recover walks through 'device' addresses and recreates each missing block. The 'bitmap_cond_end_sync' function allows the write-intent-bitmap (When present) to be updated to reflect a partially completed resync. It makes assumptions which mean that it does not work correctly for raid10 recovery at all. In particularly, it can cause bitmap-directed recovery of a raid10 to not recovery some of the blocks that need to be recovered. So move the call to bitmap_cond_end_sync into the resync path, rather than being in the common "resync or recovery" path. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 118f89e716ea..e1feb87afc6c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1749,8 +1749,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - bitmap_cond_end_sync(mddev->bitmap, sector_nr); - /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that * have bi_end_io, bi_sector, bi_bdev set, @@ -1886,6 +1884,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i /* resync. Schedule a read for every block at this virt offset */ int count = 0; + bitmap_cond_end_sync(mddev->bitmap, sector_nr); + if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { From 73d5c38a9536142e062c35997b044e89166e063b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2009 13:18:47 +1100 Subject: [PATCH 16/49] md: avoid races when stopping resync. There has been a race in raid10 and raid1 for a long time which has only recently started showing up due to a scheduler changed. When a sync_read request finishes, as soon as reschedule_retry is called, another thread can mark the resync request as having completed, so md_do_sync can finish, ->stop can be called, and ->conf can be freed. So using conf after reschedule_retry is not safe. Similarly, when finishing a sync_write, calling md_done_sync must be the last thing we do, as it allows a chain of events which will free conf and other data structures. The first of these requires action in raid10.c The second requires action in raid1.c and raid10.c Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid1.c | 3 ++- drivers/md/raid10.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 01e3cffd03b8..e2466425d9ca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1237,8 +1237,9 @@ static void end_sync_write(struct bio *bio, int error) update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { - md_done_sync(mddev, r1_bio->sectors, uptodate); + sector_t s = r1_bio->sectors; put_buf(r1_bio); + md_done_sync(mddev, s, uptodate); } } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e1feb87afc6c..7301631abe04 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1236,6 +1236,7 @@ static void end_sync_read(struct bio *bio, int error) /* for reconstruct, we always reschedule after a read. * for resync, only after all reads */ + rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev); if (test_bit(R10BIO_IsRecover, &r10_bio->state) || atomic_dec_and_test(&r10_bio->remaining)) { /* we have read all the blocks, @@ -1243,7 +1244,6 @@ static void end_sync_read(struct bio *bio, int error) */ reschedule_retry(r10_bio); } - rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev); } static void end_sync_write(struct bio *bio, int error) @@ -1264,11 +1264,13 @@ static void end_sync_write(struct bio *bio, int error) update_head_pos(i, r10_bio); + rdev_dec_pending(conf->mirrors[d].rdev, mddev); while (atomic_dec_and_test(&r10_bio->remaining)) { if (r10_bio->master_bio == NULL) { /* the primary of several recovery bios */ - md_done_sync(mddev, r10_bio->sectors, 1); + sector_t s = r10_bio->sectors; put_buf(r10_bio); + md_done_sync(mddev, s, 1); break; } else { r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; @@ -1276,7 +1278,6 @@ static void end_sync_write(struct bio *bio, int error) r10_bio = r10_bio2; } } - rdev_dec_pending(conf->mirrors[d].rdev, mddev); } /* From 7c04d1d97a8d918b7ae2ef478229862b71a65f06 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 23 Feb 2009 15:36:40 -0800 Subject: [PATCH 17/49] drm/i915: remove PLL debugging messages These are normal; we walk through different values looking for the right one, so why flood the screen with messages? Signed-off-by: Jesse Barnes Reviewed-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 65b635ce28c8..a2834276cb38 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -217,7 +217,7 @@ bool intel_pipe_has_type (struct drm_crtc *crtc, int type) return false; } -#define INTELPllInvalid(s) do { DRM_DEBUG(s); return false; } while (0) +#define INTELPllInvalid(s) do { /* DRM_DEBUG(s); */ return false; } while (0) /** * Returns whether the given set of divisors are valid for a given refclk with * the given connectors. From 37df96736bfe6f5fd9a141d62946e1083d73e712 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 23 Feb 2009 15:36:42 -0800 Subject: [PATCH 18/49] drm/i915: handle bogus VBT panel timing We've seen cases in the wild where the VBT sync data is wrong, so add some code to fix it up in that case, taking care to make sure that the total is greater than the sync end. Signed-off-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_bios.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 65be30dccc77..fc28e2bbd542 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -111,6 +111,12 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) panel_fixed_mode->clock = dvo_timing->clock * 10; panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; + /* Some VBTs have bogus h/vtotal values */ + if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) + panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; + if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) + panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1; + drm_mode_set_name(panel_fixed_mode); dev_priv->vbt_mode = panel_fixed_mode; From c8766ac5933d6ee75e7ce379a1eb5ceb451fcb83 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Feb 2009 08:44:33 -0800 Subject: [PATCH 19/49] drm: Fix shifts of EDID vsync offset/width fields. Signed-off-by: Linus Torvalds Reviewed-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 5a4d3244758a..e902b1cefc06 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -320,10 +320,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, mode->htotal = mode->hdisplay + ((pt->hblank_hi << 8) | pt->hblank_lo); mode->vdisplay = (pt->vactive_hi << 8) | pt->vactive_lo; - mode->vsync_start = mode->vdisplay + ((pt->vsync_offset_hi << 8) | + mode->vsync_start = mode->vdisplay + ((pt->vsync_offset_hi << 4) | pt->vsync_offset_lo); mode->vsync_end = mode->vsync_start + - ((pt->vsync_pulse_width_hi << 8) | + ((pt->vsync_pulse_width_hi << 4) | pt->vsync_pulse_width_lo); mode->vtotal = mode->vdisplay + ((pt->vblank_hi << 8) | pt->vblank_lo); From fe56cf45f951b3810313584605c1d8a4f20b33a4 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 23 Feb 2009 15:36:41 -0800 Subject: [PATCH 20/49] drm: Fix ordering of bit fields in EDID structure leading huge vsync values. Signed-off-by: Jesse Barnes Reviewed-by: Eric Anholt Signed-off-by: Dave Airlie --- include/drm/drm_edid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index c707c15f5164..ff8d27af4786 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -58,10 +58,10 @@ struct detailed_pixel_timing { u8 hsync_pulse_width_lo; u8 vsync_pulse_width_lo:4; u8 vsync_offset_lo:4; - u8 hsync_pulse_width_hi:2; - u8 hsync_offset_hi:2; u8 vsync_pulse_width_hi:2; u8 vsync_offset_hi:2; + u8 hsync_pulse_width_hi:2; + u8 hsync_offset_hi:2; u8 width_mm_lo; u8 height_mm_lo; u8 height_mm_hi:4; From 7bec756c74b1a5079d5074144bb77a6b3e7d7783 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 23 Feb 2009 16:09:34 -0800 Subject: [PATCH 21/49] drm: disable encoders before re-routing them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases we may receive a mode config that has a different CRTC<->encoder map that the current configuration. In that case, we need to disable any re-routed encoders before setting the mode, otherwise they may not pick up the new CRTC (if the output types are incompatible for example). Tested-by: Kristian Høgsberg Signed-off-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 76 +++++++++++++++++++++++++++++-- include/drm/drm_crtc_helper.h | 1 + 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 733028b4d45e..1c3a8c557140 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -452,6 +452,59 @@ static void drm_setup_crtcs(struct drm_device *dev) kfree(modes); kfree(enabled); } + +/** + * drm_encoder_crtc_ok - can a given crtc drive a given encoder? + * @encoder: encoder to test + * @crtc: crtc to test + * + * Return false if @encoder can't be driven by @crtc, true otherwise. + */ +static bool drm_encoder_crtc_ok(struct drm_encoder *encoder, + struct drm_crtc *crtc) +{ + struct drm_device *dev; + struct drm_crtc *tmp; + int crtc_mask = 1; + + WARN(!crtc, "checking null crtc?"); + + dev = crtc->dev; + + list_for_each_entry(tmp, &dev->mode_config.crtc_list, head) { + if (tmp == crtc) + break; + crtc_mask <<= 1; + } + + if (encoder->possible_crtcs & crtc_mask) + return true; + return false; +} + +/* + * Check the CRTC we're going to map each output to vs. its current + * CRTC. If they don't match, we have to disable the output and the CRTC + * since the driver will have to re-route things. + */ +static void +drm_crtc_prepare_encoders(struct drm_device *dev) +{ + struct drm_encoder_helper_funcs *encoder_funcs; + struct drm_encoder *encoder; + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + encoder_funcs = encoder->helper_private; + /* Disable unused encoders */ + if (encoder->crtc == NULL) + (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); + /* Disable encoders whose CRTC is about to change */ + if (encoder_funcs->get_crtc && + encoder->crtc != (*encoder_funcs->get_crtc)(encoder)) + (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); + } +} + /** * drm_crtc_set_mode - set a mode * @crtc: CRTC to program @@ -547,6 +600,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, encoder_funcs->prepare(encoder); } + drm_crtc_prepare_encoders(dev); + crtc_funcs->prepare(crtc); /* Set up the DPLL and any encoders state that needs to adjust or depend @@ -617,7 +672,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) struct drm_device *dev; struct drm_crtc **save_crtcs, *new_crtc; struct drm_encoder **save_encoders, *new_encoder; - struct drm_framebuffer *old_fb; + struct drm_framebuffer *old_fb = NULL; bool save_enabled; bool mode_changed = false; bool fb_changed = false; @@ -668,9 +723,10 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) * and then just flip_or_move it */ if (set->crtc->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ - if (set->crtc->fb == NULL) + if (set->crtc->fb == NULL) { + DRM_DEBUG("crtc has no fb, full mode set\n"); mode_changed = true; - else if ((set->fb->bits_per_pixel != + } else if ((set->fb->bits_per_pixel != set->crtc->fb->bits_per_pixel) || set->fb->depth != set->crtc->fb->depth) fb_changed = true; @@ -682,7 +738,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) fb_changed = true; if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) { - DRM_DEBUG("modes are different\n"); + DRM_DEBUG("modes are different, full mode set\n"); drm_mode_debug_printmodeline(&set->crtc->mode); drm_mode_debug_printmodeline(set->mode); mode_changed = true; @@ -708,6 +764,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } if (new_encoder != connector->encoder) { + DRM_DEBUG("encoder changed, full mode switch\n"); mode_changed = true; connector->encoder = new_encoder; } @@ -734,10 +791,20 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) if (set->connectors[ro] == connector) new_crtc = set->crtc; } + + /* Make sure the new CRTC will work with the encoder */ + if (new_crtc && + !drm_encoder_crtc_ok(connector->encoder, new_crtc)) { + ret = -EINVAL; + goto fail_set_mode; + } if (new_crtc != connector->encoder->crtc) { + DRM_DEBUG("crtc changed, full mode switch\n"); mode_changed = true; connector->encoder->crtc = new_crtc; } + DRM_DEBUG("setting connector %d crtc to %p\n", + connector->base.id, new_crtc); } /* mode_set_base is not a required function */ @@ -781,6 +848,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) fail_set_mode: set->crtc->enabled = save_enabled; + set->crtc->fb = old_fb; count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (!connector->encoder) diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 0b0d236c2154..c7d4b2e606a5 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -76,6 +76,7 @@ struct drm_encoder_helper_funcs { void (*mode_set)(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); + struct drm_crtc *(*get_crtc)(struct drm_encoder *encoder); /* detect for DAC style encoders */ enum drm_connector_status (*detect)(struct drm_encoder *encoder, struct drm_connector *connector); From b3f5e7329df1a508ac58ebe7509fb7a47b9eab6a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Feb 2009 14:48:22 +0000 Subject: [PATCH 22/49] drm: Correct unbalanced drm_vblank_put() during mode setting. The first time we install a mode, the vblank will be disabled for a pipe and so drm_vblank_get() in drm_vblank_pre_modeset() will fail. As we unconditionally call drm_vblank_put() afterwards, the vblank reference counter becomes unbalanced. Signed-off-by: Chris Wilson Acked-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 3795dbc0f50c..93e677a481f5 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -435,6 +435,8 @@ EXPORT_SYMBOL(drm_vblank_get); */ void drm_vblank_put(struct drm_device *dev, int crtc) { + BUG_ON (atomic_read (&dev->vblank_refcount[crtc]) == 0); + /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&dev->vblank_refcount[crtc])) mod_timer(&dev->vblank_disable_timer, jiffies + 5*DRM_HZ); @@ -460,8 +462,9 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) * so that interrupts remain enabled in the interim. */ if (!dev->vblank_inmodeset[crtc]) { - dev->vblank_inmodeset[crtc] = 1; - drm_vblank_get(dev, crtc); + dev->vblank_inmodeset[crtc] = 0x1; + if (drm_vblank_get(dev, crtc) == 0) + dev->vblank_inmodeset[crtc] |= 0x2; } } EXPORT_SYMBOL(drm_vblank_pre_modeset); @@ -473,9 +476,12 @@ void drm_vblank_post_modeset(struct drm_device *dev, int crtc) if (dev->vblank_inmodeset[crtc]) { spin_lock_irqsave(&dev->vbl_lock, irqflags); dev->vblank_disable_allowed = 1; - dev->vblank_inmodeset[crtc] = 0; spin_unlock_irqrestore(&dev->vbl_lock, irqflags); - drm_vblank_put(dev, crtc); + + if (dev->vblank_inmodeset[crtc] & 0x2) + drm_vblank_put(dev, crtc); + + dev->vblank_inmodeset[crtc] = 0; } } EXPORT_SYMBOL(drm_vblank_post_modeset); From d61e7380b402a481ab1fa8027068a24918f701c8 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 24 Feb 2009 20:31:53 -0500 Subject: [PATCH 23/49] drm: edid revision 0 is valid edid->revision == 0 should be valid (at least, so the error message indicates. :) and wikipedia seems to indicate that EDID 1.0 existed. We can dump the entire check, since edid->revision is a u8, so it can't ever be less than 0. Marko reports in RH bz#476735 that his monitor claims to be EDID 1.0, and therefore hits the check and is stuck at 800x600 because of it. Reported-by: Marko Ristola Signed-off-by: Kyle McMartin Acked-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index e902b1cefc06..a839a28d8ee6 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -125,7 +125,7 @@ static bool edid_is_valid(struct edid *edid) DRM_ERROR("EDID has major version %d, instead of 1\n", edid->version); goto bad; } - if (edid->revision <= 0 || edid->revision > 3) { + if (edid->revision > 3) { DRM_ERROR("EDID has minor version %d, which is not between 0-3\n", edid->revision); goto bad; } From dd0910b3c71b253c08111110f0399b924a8d5853 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 25 Feb 2009 14:49:21 +1000 Subject: [PATCH 24/49] drm/i915: make hw page ioremap use ioremap_wc However we still have another issue with ioremap_wc not falling back properly or somehow doing something else stupid, this probably needs to be tracked down. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2d797ffe8137..cc4649577a61 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -811,7 +811,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data, dev_priv->hws_map.flags = 0; dev_priv->hws_map.mtrr = 0; - drm_core_ioremap(&dev_priv->hws_map, dev); + drm_core_ioremap_wc(&dev_priv->hws_map, dev); if (dev_priv->hws_map.handle == NULL) { i915_dma_cleanup(dev); dev_priv->status_gfx_addr = 0; From e08fb4f6d1dc95eff5b3fc1d0412bcb5afcae7f2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 25 Feb 2009 14:52:30 +1000 Subject: [PATCH 25/49] drm/i915: convert DRM_ERROR to DRM_DEBUG in phys object pwrite path This snuck in when I wrote phys object support. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 28b726d07a0c..85685bfd12da 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3548,7 +3548,7 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, user_data = (char __user *) (uintptr_t) args->data_ptr; obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; - DRM_ERROR("obj_addr %p, %lld\n", obj_addr, args->size); + DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size); ret = copy_from_user(obj_addr, user_data, args->size); if (ret) return -EFAULT; From 6aa03ab06978e97b3e0720f83280d7841051916b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 25 Feb 2009 14:06:26 +0900 Subject: [PATCH 26/49] Fix iwlan DMA mapping direction When iwlan runs on IOMMU, IOMMU generates a lot of PTE write faults because PTE write bit is not set on some of PTE's. This is because iwlan driver calls DMA mapping with PCI_DMA_TODEVICE which is read only in mapping PTE. But iwlan device actually writes to the mapped page to update its contents. This issue is not exposed in swiotlb. But VT-d hardware can capture this fault and stop the fault transaction. The following patch fixes the issue. Signed-off-by: Fenghua Yu Reviewed-by: Bhavesh Davda Tested-by: Chris Wright Acked-by: Tomas Winkler Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- drivers/net/wireless/iwlwifi/iwl-tx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index b0ee86c62685..ab13ff22a8c0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -148,7 +148,7 @@ static void iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq) pci_unmap_single(dev, pci_unmap_addr(&txq->cmd[index]->meta, mapping), pci_unmap_len(&txq->cmd[index]->meta, len), - PCI_DMA_TODEVICE); + PCI_DMA_BIDIRECTIONAL); /* Unmap chunks, if any. */ for (i = 1; i < num_tbs; i++) { @@ -964,7 +964,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) * within command buffer array. */ txcmd_phys = pci_map_single(priv->pci_dev, out_cmd, sizeof(struct iwl_cmd), - PCI_DMA_TODEVICE); + PCI_DMA_BIDIRECTIONAL); pci_unmap_addr_set(&out_cmd->meta, mapping, txcmd_phys); pci_unmap_len_set(&out_cmd->meta, len, sizeof(struct iwl_cmd)); /* Add buffer containing Tx command and MAC(!) header to TFD's @@ -1115,7 +1115,7 @@ int iwl_enqueue_hcmd(struct iwl_priv *priv, struct iwl_host_cmd *cmd) IWL_MAX_SCAN_SIZE : sizeof(struct iwl_cmd); phys_addr = pci_map_single(priv->pci_dev, out_cmd, - len, PCI_DMA_TODEVICE); + len, PCI_DMA_BIDIRECTIONAL); pci_unmap_addr_set(&out_cmd->meta, mapping, phys_addr); pci_unmap_len_set(&out_cmd->meta, len, len); phys_addr += offsetof(struct iwl_cmd, hdr); @@ -1212,7 +1212,7 @@ static void iwl_hcmd_queue_reclaim(struct iwl_priv *priv, int txq_id, pci_unmap_single(priv->pci_dev, pci_unmap_addr(&txq->cmd[cmd_idx]->meta, mapping), pci_unmap_len(&txq->cmd[cmd_idx]->meta, len), - PCI_DMA_TODEVICE); + PCI_DMA_BIDIRECTIONAL); for (idx = iwl_queue_inc_wrap(idx, q->n_bd); q->read_ptr != idx; q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) { From 0af80c04e2f2e45ae09fceb17df8050f828a5c40 Mon Sep 17 00:00:00 2001 From: David Fries Date: Wed, 25 Feb 2009 20:28:21 +0100 Subject: [PATCH 27/49] ide: ide.c 'clear' fix, update "ide=nodma" documentation Documentation/kernel-parameters.txt - ide=nodma is no longer valid. drivers/ide/Kconfig - The module is ide-core.ko not ide. drivers/ide/ide.c - It took me a while to figure out what the arguments %d.%d:%d to nodma module parameter ment, so I added a comment to each. - Added a comment to each of the sscanf lines. - There is a bug, if j is 0 it would previously clear all the other bits except the current device, changed in three different places. mask &= (1 << i) should be mask &= ~(1 << i). Signed-off-by: David Fries [bart: s/disk/device/ in ide.c, beautify patch description] Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/kernel-parameters.txt | 6 ++++-- drivers/ide/Kconfig | 2 +- drivers/ide/ide.c | 11 ++++++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 319785b6dcb1..0ed3234125e3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -868,8 +868,10 @@ and is between 256 and 4096 characters. It is defined in the file icn= [HW,ISDN] Format: [,[,[,]]] - ide= [HW] (E)IDE subsystem - Format: ide=nodma or ide=doubler + ide-core.nodma= [HW] (E)IDE subsystem + Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc + .vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom + .chs .ignore_cable are additional options See Documentation/ide/ide.txt. idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 3dad2299d9c5..e072903b12f0 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -46,7 +46,7 @@ menuconfig IDE SMART parameters from disk drives. To compile this driver as a module, choose M here: the - module will be called ide. + module will be called ide-core.ko. For further information, please read . diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 258805da15c3..0920e3b0c962 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -337,6 +337,7 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) int a, b, i, j = 1; unsigned int *dev_param_mask = (unsigned int *)kp->arg; + /* controller . device (0 or 1) [ : 1 (set) | 0 (clear) ] */ if (sscanf(s, "%d.%d:%d", &a, &b, &j) != 3 && sscanf(s, "%d.%d", &a, &b) != 2) return -EINVAL; @@ -349,7 +350,7 @@ static int ide_set_dev_param_mask(const char *s, struct kernel_param *kp) if (j) *dev_param_mask |= (1 << i); else - *dev_param_mask &= (1 << i); + *dev_param_mask &= ~(1 << i); return 0; } @@ -392,6 +393,8 @@ static int ide_set_disk_chs(const char *str, struct kernel_param *kp) { int a, b, c = 0, h = 0, s = 0, i, j = 1; + /* controller . device (0 or 1) : Cylinders , Heads , Sectors */ + /* controller . device (0 or 1) : 1 (use CHS) | 0 (ignore CHS) */ if (sscanf(str, "%d.%d:%d,%d,%d", &a, &b, &c, &h, &s) != 5 && sscanf(str, "%d.%d:%d", &a, &b, &j) != 3) return -EINVAL; @@ -407,7 +410,7 @@ static int ide_set_disk_chs(const char *str, struct kernel_param *kp) if (j) ide_disks |= (1 << i); else - ide_disks &= (1 << i); + ide_disks &= ~(1 << i); ide_disks_chs[i].cyl = c; ide_disks_chs[i].head = h; @@ -469,6 +472,8 @@ static int ide_set_ignore_cable(const char *s, struct kernel_param *kp) { int i, j = 1; + /* controller (ignore) */ + /* controller : 1 (ignore) | 0 (use) */ if (sscanf(s, "%d:%d", &i, &j) != 2 && sscanf(s, "%d", &i) != 1) return -EINVAL; @@ -478,7 +483,7 @@ static int ide_set_ignore_cable(const char *s, struct kernel_param *kp) if (j) ide_ignore_cable |= (1 << i); else - ide_ignore_cable &= (1 << i); + ide_ignore_cable &= ~(1 << i); return 0; } From 43a12216d3664a9fa6c8ceb398da6ef08fee7ff7 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 25 Feb 2009 20:28:22 +0100 Subject: [PATCH 28/49] amd74xx: device/vendor confusion Device and vendor ids were confused Signed-off-by: Roel Kluin Cc: Andrew Morton Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/amd74xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c index 69660a431cd9..77267c859965 100644 --- a/drivers/ide/amd74xx.c +++ b/drivers/ide/amd74xx.c @@ -166,7 +166,7 @@ static unsigned int init_chipset_amd74xx(struct pci_dev *dev) * Check for broken FIFO support. */ if (dev->vendor == PCI_VENDOR_ID_AMD && - dev->vendor == PCI_DEVICE_ID_AMD_VIPER_7411) + dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) t &= 0x0f; else t |= 0xf0; From f76bee16fc83f58d6c1b088977330f26ed7ae248 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 25 Feb 2009 20:28:22 +0100 Subject: [PATCH 29/49] atiixp: fix missing parentheses Fix missing parentheses so PIO/DMA timings for master device on the second channel are programmed correctly (IOW "8 0 24 16" offset values should be used instead of the current "8 0 16 16"). [ The bug went unnoticed because after PIO/DMA timings get programmed incorrectly for the third device they are overwritten with timings for the fourth device and since BIOS should also program timings for the third device everything should work fine until suspend/resume cycle or user requested transfer mode changes. ] Signed-off-by: Roel Kluin Cc: Sergei Shtylyov Cc: Andrew Morton [bart: update patch description] Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/atiixp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c index b2735d28f5cc..ecd1e62ca91a 100644 --- a/drivers/ide/atiixp.c +++ b/drivers/ide/atiixp.c @@ -52,7 +52,7 @@ static void atiixp_set_pio_mode(ide_drive_t *drive, const u8 pio) { struct pci_dev *dev = to_pci_dev(drive->hwif->dev); unsigned long flags; - int timing_shift = (drive->dn & 2) ? 16 : 0 + (drive->dn & 1) ? 0 : 8; + int timing_shift = (drive->dn ^ 1) * 8; u32 pio_timing_data; u16 pio_mode_data; @@ -85,7 +85,7 @@ static void atiixp_set_dma_mode(ide_drive_t *drive, const u8 speed) { struct pci_dev *dev = to_pci_dev(drive->hwif->dev); unsigned long flags; - int timing_shift = (drive->dn & 2) ? 16 : 0 + (drive->dn & 1) ? 0 : 8; + int timing_shift = (drive->dn ^ 1) * 8; u32 tmp32; u16 tmp16; u16 udma_ctl = 0; From f38344b0a0898d2a8c13581ee61007719e16e1d7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Feb 2009 20:28:22 +0100 Subject: [PATCH 30/49] it821x: remove dead URL Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/it821x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index e1c4f5437396..13b8153112ed 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -5,9 +5,8 @@ * May be copied or modified under the terms of the GNU General Public License * Based in part on the ITE vendor provided SCSI driver. * - * Documentation available from - * http://www.ite.com.tw/pc/IT8212F_V04.pdf - * Some other documents are NDA. + * Documentation: + * Datasheet is freely available, some other documents under NDA. * * The ITE8212 isn't exactly a standard IDE controller. It has two * modes. In pass through mode then it is an IDE controller. In its smart From d3dd7107f4d843d0f01d0f77d49a7c5449130577 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Feb 2009 20:28:23 +0100 Subject: [PATCH 31/49] ide-cd: document capacity hack Just copy the comment from drivers/scsi/sr.c::sr_done() (from which the capacity hack has been originated). Cc: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 0bfeb0c79d6e..690475b834de 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -194,6 +194,14 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, bio_sectors = max(bio_sectors(failed_command->bio), 4U); sector &= ~(bio_sectors - 1); + /* + * The SCSI specification allows for the value + * returned by READ CAPACITY to be up to 75 2K + * sectors past the last readable block. + * Therefore, if we hit a medium error within the + * last 75 2K sectors, we decrease the saved size + * value. + */ if (sector < get_capacity(info->disk) && drive->probed_capacity - sector < 4 * 75) set_capacity(info->disk, sector); From 8fed43684174b68f04d01d1210fd00536af790df Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Feb 2009 20:28:24 +0100 Subject: [PATCH 32/49] ide: fix refcounting in device drivers During host driver module removal del_gendisk() results in a final put on drive->gendev and freeing the drive by drive_release_dev(). Convert device drivers from using struct kref to use struct device so device driver's object holds reference on ->gendev and prevents drive from prematurely going away. Also fix ->remove methods to not erroneously drop reference on a host driver by using only put_device() instead of ide*_put(). Reported-by: Stanislaw Gruszka Tested-by: Stanislaw Gruszka Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 27 ++++++++++++++++++--------- drivers/ide/ide-cd.h | 2 +- drivers/ide/ide-gd.c | 26 +++++++++++++++++--------- drivers/ide/ide-gd.h | 2 +- drivers/ide/ide-tape.c | 29 +++++++++++++++++++---------- include/linux/ide.h | 2 +- 6 files changed, 57 insertions(+), 31 deletions(-) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 690475b834de..ddfbea41d296 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -55,7 +55,7 @@ static DEFINE_MUTEX(idecd_ref_mutex); -static void ide_cd_release(struct kref *); +static void ide_cd_release(struct device *); static struct cdrom_info *ide_cd_get(struct gendisk *disk) { @@ -67,7 +67,7 @@ static struct cdrom_info *ide_cd_get(struct gendisk *disk) if (ide_device_get(cd->drive)) cd = NULL; else - kref_get(&cd->kref); + get_device(&cd->dev); } mutex_unlock(&idecd_ref_mutex); @@ -79,7 +79,7 @@ static void ide_cd_put(struct cdrom_info *cd) ide_drive_t *drive = cd->drive; mutex_lock(&idecd_ref_mutex); - kref_put(&cd->kref, ide_cd_release); + put_device(&cd->dev); ide_device_put(drive); mutex_unlock(&idecd_ref_mutex); } @@ -1798,15 +1798,17 @@ static void ide_cd_remove(ide_drive_t *drive) ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); ide_proc_unregister_driver(drive, info->driver); - + device_del(&info->dev); del_gendisk(info->disk); - ide_cd_put(info); + mutex_lock(&idecd_ref_mutex); + put_device(&info->dev); + mutex_unlock(&idecd_ref_mutex); } -static void ide_cd_release(struct kref *kref) +static void ide_cd_release(struct device *dev) { - struct cdrom_info *info = to_ide_drv(kref, cdrom_info); + struct cdrom_info *info = to_ide_drv(dev, cdrom_info); struct cdrom_device_info *devinfo = &info->devinfo; ide_drive_t *drive = info->drive; struct gendisk *g = info->disk; @@ -2005,7 +2007,12 @@ static int ide_cd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&info->kref); + info->dev.parent = &drive->gendev; + info->dev.release = ide_cd_release; + dev_set_name(&info->dev, dev_name(&drive->gendev)); + + if (device_register(&info->dev)) + goto out_free_disk; info->drive = drive; info->driver = &ide_cdrom_driver; @@ -2019,7 +2026,7 @@ static int ide_cd_probe(ide_drive_t *drive) g->driverfs_dev = &drive->gendev; g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; if (ide_cdrom_setup(drive)) { - ide_cd_release(&info->kref); + put_device(&info->dev); goto failed; } @@ -2029,6 +2036,8 @@ static int ide_cd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_cd: kfree(info); failed: diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index ac40d6cb90a2..c878bfcf1116 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -80,7 +80,7 @@ struct cdrom_info { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* Buffer for table of contents. NULL if we haven't allocated a TOC buffer for this device yet. */ diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 7857b209c6df..047109419902 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -25,7 +25,7 @@ module_param(debug_mask, ulong, 0644); static DEFINE_MUTEX(ide_disk_ref_mutex); -static void ide_disk_release(struct kref *); +static void ide_disk_release(struct device *); static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) { @@ -37,7 +37,7 @@ static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) if (ide_device_get(idkp->drive)) idkp = NULL; else - kref_get(&idkp->kref); + get_device(&idkp->dev); } mutex_unlock(&ide_disk_ref_mutex); return idkp; @@ -48,7 +48,7 @@ static void ide_disk_put(struct ide_disk_obj *idkp) ide_drive_t *drive = idkp->drive; mutex_lock(&ide_disk_ref_mutex); - kref_put(&idkp->kref, ide_disk_release); + put_device(&idkp->dev); ide_device_put(drive); mutex_unlock(&ide_disk_ref_mutex); } @@ -66,17 +66,18 @@ static void ide_gd_remove(ide_drive_t *drive) struct gendisk *g = idkp->disk; ide_proc_unregister_driver(drive, idkp->driver); - + device_del(&idkp->dev); del_gendisk(g); - drive->disk_ops->flush(drive); - ide_disk_put(idkp); + mutex_lock(&ide_disk_ref_mutex); + put_device(&idkp->dev); + mutex_unlock(&ide_disk_ref_mutex); } -static void ide_disk_release(struct kref *kref) +static void ide_disk_release(struct device *dev) { - struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj); + struct ide_disk_obj *idkp = to_ide_drv(dev, ide_disk_obj); ide_drive_t *drive = idkp->drive; struct gendisk *g = idkp->disk; @@ -348,7 +349,12 @@ static int ide_gd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&idkp->kref); + idkp->dev.parent = &drive->gendev; + idkp->dev.release = ide_disk_release; + dev_set_name(&idkp->dev, dev_name(&drive->gendev)); + + if (device_register(&idkp->dev)) + goto out_free_disk; idkp->drive = drive; idkp->driver = &ide_gd_driver; @@ -373,6 +379,8 @@ static int ide_gd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_idkp: kfree(idkp); failed: diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h index a86779f0756b..b604bdd318a1 100644 --- a/drivers/ide/ide-gd.h +++ b/drivers/ide/ide-gd.h @@ -17,7 +17,7 @@ struct ide_disk_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; unsigned int openers; /* protected by BKL for now */ /* Last failed packet command */ diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d7ecd3c79757..bb450a7608c2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -169,7 +169,7 @@ typedef struct ide_tape_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* * failed_pc points to the last failed packet command, or contains @@ -267,7 +267,7 @@ static DEFINE_MUTEX(idetape_ref_mutex); static struct class *idetape_sysfs_class; -static void ide_tape_release(struct kref *); +static void ide_tape_release(struct device *); static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) { @@ -279,7 +279,7 @@ static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) if (ide_device_get(tape->drive)) tape = NULL; else - kref_get(&tape->kref); + get_device(&tape->dev); } mutex_unlock(&idetape_ref_mutex); return tape; @@ -290,7 +290,7 @@ static void ide_tape_put(struct ide_tape_obj *tape) ide_drive_t *drive = tape->drive; mutex_lock(&idetape_ref_mutex); - kref_put(&tape->kref, ide_tape_release); + put_device(&tape->dev); ide_device_put(drive); mutex_unlock(&idetape_ref_mutex); } @@ -308,7 +308,7 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) mutex_lock(&idetape_ref_mutex); tape = idetape_devs[i]; if (tape) - kref_get(&tape->kref); + get_device(&tape->dev); mutex_unlock(&idetape_ref_mutex); return tape; } @@ -2256,15 +2256,17 @@ static void ide_tape_remove(ide_drive_t *drive) idetape_tape_t *tape = drive->driver_data; ide_proc_unregister_driver(drive, tape->driver); - + device_del(&tape->dev); ide_unregister_region(tape->disk); - ide_tape_put(tape); + mutex_lock(&idetape_ref_mutex); + put_device(&tape->dev); + mutex_unlock(&idetape_ref_mutex); } -static void ide_tape_release(struct kref *kref) +static void ide_tape_release(struct device *dev) { - struct ide_tape_obj *tape = to_ide_drv(kref, ide_tape_obj); + struct ide_tape_obj *tape = to_ide_drv(dev, ide_tape_obj); ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; @@ -2407,7 +2409,12 @@ static int ide_tape_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&tape->kref); + tape->dev.parent = &drive->gendev; + tape->dev.release = ide_tape_release; + dev_set_name(&tape->dev, dev_name(&drive->gendev)); + + if (device_register(&tape->dev)) + goto out_free_disk; tape->drive = drive; tape->driver = &idetape_driver; @@ -2436,6 +2443,8 @@ static int ide_tape_probe(ide_drive_t *drive) return 0; +out_free_disk: + put_disk(g); out_free_tape: kfree(tape); failed: diff --git a/include/linux/ide.h b/include/linux/ide.h index 194da5a4b0d6..fe235b65207e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -663,7 +663,7 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev) container_of(dev, ide_drive_t, gendev) #define to_ide_drv(obj, cont_type) \ - container_of(obj, struct cont_type, kref) + container_of(obj, struct cont_type, dev) #define ide_drv_g(disk, cont_type) \ container_of((disk)->private_data, struct cont_type, driver) From 6b1ff036d4cde7834ef2f9dbea5747adaaac24e0 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sat, 14 Feb 2009 04:11:29 -0500 Subject: [PATCH 33/49] [IA64] enable setting DMAR on by default The previous commit which introduced the DMAR_DEFAULT_ON setting in drivers/pci/dmar.c neglected to add the ability for ia64 to enable the IOMMU by default. Rectify that mistake, doh! Signed-off-by: Kyle McMartin Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 4eb45c012498..153e727a6e8e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -638,6 +638,17 @@ config DMAR and include PCI device scope covered by these DMA remapping devices. +config DMAR_DEFAULT_ON + def_bool y + prompt "Enable DMA Remapping Devices by default" + depends on DMAR + help + Selecting this option will enable a DMAR device at boot time if + one is found. If this option is not selected, DMAR support can + be enabled by passing intel_iommu=on to the kernel. It is + recommended you say N here while the DMAR code remains + experimental. + endmenu endif From aa2f63c95439a11dfac35c60d9160dcd0189aed3 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 22 Feb 2009 02:33:28 +0100 Subject: [PATCH 34/49] [IA64] Do not go beyond ARRAY_SIZE of unw.hash static struct { ... :114 unsigned short hash[UNW_HASH_SIZE]; ... :2152 for (index = 0; index <= UNW_HASH_SIZE; ++index) { This is a bug, isn't it? s/<=/ Signed-off-by: Tony Luck --- arch/ia64/kernel/unwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 67810b77d998..b6c0e63a0bf6 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -2149,7 +2149,7 @@ unw_remove_unwind_table (void *handle) /* next, remove hash table entries for this table */ - for (index = 0; index <= UNW_HASH_SIZE; ++index) { + for (index = 0; index < UNW_HASH_SIZE; ++index) { tmp = unw.cache + unw.hash[index]; if (unw.hash[index] >= UNW_CACHE_SIZE || tmp->ip < table->start || tmp->ip >= table->end) From 5b5923975f07836fc7a5388f9fa5f459828ae4ee Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 21 Feb 2009 23:40:27 +0100 Subject: [PATCH 35/49] [IA64] Don't go beyond iosapic_intr_info's arraysize vi arch/ia64/kernel/iosapic.c +142 static struct iosapic_intr_info { ... } iosapic_intr_info[NR_IRQS]; But at line 510 we have: for (i = 0; i <= NR_IRQS; i++) { s/<=/ Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5cfd3d91001a..e13125058bed 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -507,7 +507,7 @@ static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol) if (trigger == IOSAPIC_EDGE) return -EINVAL; - for (i = 0; i <= NR_IRQS; i++) { + for (i = 0; i < NR_IRQS; i++) { info = &iosapic_intr_info[i]; if (info->trigger == trigger && info->polarity == pol && (info->dmode == IOSAPIC_FIXED || From 0b0a0806b0d8635e046bf533225a25903b1cddce Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 24 Feb 2009 20:51:52 +0000 Subject: [PATCH 36/49] shmem: fix shared anonymous accounting Each time I exit Firefox, /proc/meminfo's Committed_AS goes down almost 400 kB: OVERCOMMIT_NEVER would be allowing overcommits it should prohibit. Commit fc8744adc870a8d4366908221508bb113d8b72ee "Stop playing silly games with the VM_ACCOUNT flag" changed shmem_file_setup() to set the shmem file's VM_ACCOUNT flag according to VM_NORESERVE not being set in the vma flags; but did so only _after_ the shmem_acct_size(flags, size) call which is expected to pre-account a shared anonymous object. It's all clearer if we switch shmem.c over to use VM_NORESERVE throughout in place of !VM_ACCOUNT. But I very nearly sent in a patch which mistakenly removed the accounting from tmpfs files: shmem_get_inode()'s memset was good for not setting VM_ACCOUNT, but now it needs to set VM_NORESERVE. Rather than setting that by default, then perhaps clearing it again in shmem_file_setup(), let's pass it as a flag to shmem_get_inode(): that allows us to remove the #ifdef CONFIG_SHMEM from shmem_file_setup(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/shmem.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 19d566ccdeea..4103a239ce84 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -169,13 +169,13 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) */ static inline int shmem_acct_size(unsigned long flags, loff_t size) { - return (flags & VM_ACCOUNT) ? - security_vm_enough_memory_kern(VM_ACCT(size)) : 0; + return (flags & VM_NORESERVE) ? + 0 : security_vm_enough_memory_kern(VM_ACCT(size)); } static inline void shmem_unacct_size(unsigned long flags, loff_t size) { - if (flags & VM_ACCOUNT) + if (!(flags & VM_NORESERVE)) vm_unacct_memory(VM_ACCT(size)); } @@ -187,13 +187,13 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) */ static inline int shmem_acct_block(unsigned long flags) { - return (flags & VM_ACCOUNT) ? - 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); + return (flags & VM_NORESERVE) ? + security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0; } static inline void shmem_unacct_blocks(unsigned long flags, long pages) { - if (!(flags & VM_ACCOUNT)) + if (flags & VM_NORESERVE) vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); } @@ -1515,8 +1515,8 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static struct inode * -shmem_get_inode(struct super_block *sb, int mode, dev_t dev) +static struct inode *shmem_get_inode(struct super_block *sb, int mode, + dev_t dev, unsigned long flags) { struct inode *inode; struct shmem_inode_info *info; @@ -1537,6 +1537,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); switch (mode & S_IFMT) { @@ -1779,9 +1780,10 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); + struct inode *inode; int error = -ENOSPC; + inode = shmem_get_inode(dir->i_sb, mode, dev, VM_NORESERVE); if (inode) { error = security_inode_init_security(inode, dir, NULL, NULL, NULL); @@ -1920,7 +1922,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s if (len > PAGE_CACHE_SIZE) return -ENAMETOOLONG; - inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); if (!inode) return -ENOSPC; @@ -2332,7 +2334,7 @@ static int shmem_fill_super(struct super_block *sb, sb->s_flags |= MS_POSIXACL; #endif - inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0); + inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); if (!inode) goto failed; inode->i_uid = sbinfo->uid; @@ -2574,12 +2576,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page) return 0; } -#define shmem_file_operations ramfs_file_operations -#define shmem_vm_ops generic_file_vm_ops -#define shmem_get_inode ramfs_get_inode -#define shmem_acct_size(a, b) 0 -#define shmem_unacct_size(a, b) do {} while (0) -#define SHMEM_MAX_BYTES LLONG_MAX +#define shmem_vm_ops generic_file_vm_ops +#define shmem_file_operations ramfs_file_operations +#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) +#define shmem_acct_size(flags, size) 0 +#define shmem_unacct_size(flags, size) do {} while (0) +#define SHMEM_MAX_BYTES LLONG_MAX #endif /* CONFIG_SHMEM */ @@ -2589,7 +2591,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) * shmem_file_setup - get an unlinked file living in tmpfs * @name: name for dentry (to be seen in /proc//maps * @size: size to be set for the file - * @flags: vm_flags + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) { @@ -2623,13 +2625,10 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) goto put_dentry; error = -ENOSPC; - inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); + inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags); if (!inode) goto close_file; -#ifdef CONFIG_SHMEM - SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT; -#endif d_instantiate(dentry, inode); inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ From 7ba07d16bd62f931efec1fc8e63bf1aeebfe42a9 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 11 Feb 2009 13:08:43 -0800 Subject: [PATCH 37/49] pata_it821x: resume from hibernation fails with RAID volume Hibernation didn't work for me since I started to use IT8212 controller. I did some debugging (booting with no_console_suspend init=/bin/sh). Found that resume fails (2.6.28) with "serial number mismatch 'some garbage' != 'some other garbage'" and "revalidation failed" messages. That's because the controller firmware fills different serial number in the IDENTIFY every boot. The patch below fixes the resume simply clearing the serial number. The proper fix would be probably to fill in the serial number of the RAID volume instead. I assume that there must be something like that stored on the drives but I don't know where. Fix resume on pata_it821x RAID volume by clearing the serial number in IDENTIFY data, which is otherwise different on each boot. Signed-off-by: Ondrej Zary Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/ata/pata_it821x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index f1bb2f9fecbf..b05b86a912c5 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -557,6 +557,9 @@ static unsigned int it821x_read_id(struct ata_device *adev, id[83] |= 0x4400; /* Word 83 is valid and LBA48 */ id[86] |= 0x0400; /* LBA48 on */ id[ATA_ID_MAJOR_VER] |= 0x1F; + /* Clear the serial number because it's different each boot + which breaks validation on resume */ + memset(&id[ATA_ID_SERNO], 0x20, ATA_ID_SERNO_LEN); } return err_mask; } From 6be96ac15e4d913e1f48299db083ada5321803b2 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Thu, 19 Feb 2009 10:38:04 -0500 Subject: [PATCH 38/49] sata_mv: fix SoC interrupt breakage For some reason, sata_mv doesn't clear interrupt status during init when it's running on an SoC host adapter. If the bootloader has touched the SATA controller before starting Linux, Linux can end up enabling the SATA interrupt with events pending, which will cause the interrupt to be marked as spurious and then be disabled, which then breaks all further accesses to the controller. This patch makes the SoC path clear interrupt status on init like in the non-SoC case. Signed-off-by: Lennert Buytenhek Signed-off-by: Mark Lord Signed-off-by: Jeff Garzik --- drivers/ata/sata_mv.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 4ae1a4138b47..7007edd2d451 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3114,19 +3114,17 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) writelfl(0, hc_mmio + HC_IRQ_CAUSE_OFS); } - if (!IS_SOC(hpriv)) { - /* Clear any currently outstanding host interrupt conditions */ - writelfl(0, mmio + hpriv->irq_cause_ofs); + /* Clear any currently outstanding host interrupt conditions */ + writelfl(0, mmio + hpriv->irq_cause_ofs); - /* and unmask interrupt generation for host regs */ - writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs); + /* and unmask interrupt generation for host regs */ + writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs); - /* - * enable only global host interrupts for now. - * The per-port interrupts get done later as ports are set up. - */ - mv_set_main_irq_mask(host, 0, PCI_ERR); - } + /* + * enable only global host interrupts for now. + * The per-port interrupts get done later as ports are set up. + */ + mv_set_main_irq_mask(host, 0, PCI_ERR); done: return rc; } From c48052cc36e02fff6a9bb3cf83c4206b9127611f Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 11 Feb 2009 13:08:41 -0800 Subject: [PATCH 39/49] [libata] pata_amd: program FIFO With 32bit PIO we can use the posted write buffers, but only for 32bit I/O cycles. This means we must disable the FIFO for ATAPI where a final 16bit cycle may occur. Rework the FIFO logic so that we disable the FIFO then selectively re-enable it when we set the timings on AMD devices. Also fix a case where we scribbled on PCI config 0x41 of Nvidia chips when we shouldn't. Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/ata/pata_amd.c | 76 ++++++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index 63719ab9ea44..115b1cd6dcf5 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -24,7 +24,7 @@ #include #define DRV_NAME "pata_amd" -#define DRV_VERSION "0.3.11" +#define DRV_VERSION "0.4.1" /** * timing_setup - shared timing computation and load @@ -145,6 +145,13 @@ static int amd_pre_reset(struct ata_link *link, unsigned long deadline) return ata_sff_prereset(link, deadline); } +/** + * amd_cable_detect - report cable type + * @ap: port + * + * AMD controller/BIOS setups record the cable type in word 0x42 + */ + static int amd_cable_detect(struct ata_port *ap) { static const u32 bitmask[2] = {0x03, 0x0C}; @@ -157,6 +164,40 @@ static int amd_cable_detect(struct ata_port *ap) return ATA_CBL_PATA40; } +/** + * amd_fifo_setup - set the PIO FIFO for ATA/ATAPI + * @ap: ATA interface + * @adev: ATA device + * + * Set the PCI fifo for this device according to the devices present + * on the bus at this point in time. We need to turn the post write buffer + * off for ATAPI devices as we may need to issue a word sized write to the + * device as the final I/O + */ + +static void amd_fifo_setup(struct ata_port *ap) +{ + struct ata_device *adev; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + static const u8 fifobit[2] = { 0xC0, 0x30}; + u8 fifo = fifobit[ap->port_no]; + u8 r; + + + ata_for_each_dev(adev, &ap->link, ENABLED) { + if (adev->class == ATA_DEV_ATAPI) + fifo = 0; + } + if (pdev->device == PCI_DEVICE_ID_AMD_VIPER_7411) /* FIFO is broken */ + fifo = 0; + + /* On the later chips the read prefetch bits become no-op bits */ + pci_read_config_byte(pdev, 0x41, &r); + r &= ~fifobit[ap->port_no]; + r |= fifo; + pci_write_config_byte(pdev, 0x41, r); +} + /** * amd33_set_piomode - set initial PIO mode data * @ap: ATA interface @@ -167,21 +208,25 @@ static int amd_cable_detect(struct ata_port *ap) static void amd33_set_piomode(struct ata_port *ap, struct ata_device *adev) { + amd_fifo_setup(ap); timing_setup(ap, adev, 0x40, adev->pio_mode, 1); } static void amd66_set_piomode(struct ata_port *ap, struct ata_device *adev) { + amd_fifo_setup(ap); timing_setup(ap, adev, 0x40, adev->pio_mode, 2); } static void amd100_set_piomode(struct ata_port *ap, struct ata_device *adev) { + amd_fifo_setup(ap); timing_setup(ap, adev, 0x40, adev->pio_mode, 3); } static void amd133_set_piomode(struct ata_port *ap, struct ata_device *adev) { + amd_fifo_setup(ap); timing_setup(ap, adev, 0x40, adev->pio_mode, 4); } @@ -397,6 +442,16 @@ static struct ata_port_operations nv133_port_ops = { .set_dmamode = nv133_set_dmamode, }; +static void amd_clear_fifo(struct pci_dev *pdev) +{ + u8 fifo; + /* Disable the FIFO, the FIFO logic will re-enable it as + appropriate */ + pci_read_config_byte(pdev, 0x41, &fifo); + fifo &= 0x0F; + pci_write_config_byte(pdev, 0x41, fifo); +} + static int amd_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { static const struct ata_port_info info[10] = { @@ -503,14 +558,8 @@ static int amd_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (type < 3) ata_pci_bmdma_clear_simplex(pdev); - - /* Check for AMD7411 */ - if (type == 3) - /* FIFO is broken */ - pci_write_config_byte(pdev, 0x41, fifo & 0x0F); - else - pci_write_config_byte(pdev, 0x41, fifo | 0xF0); - + if (pdev->vendor == PCI_VENDOR_ID_AMD) + amd_clear_fifo(pdev); /* Cable detection on Nvidia chips doesn't work too well, * cache BIOS programmed UDMA mode. */ @@ -536,18 +585,11 @@ static int amd_reinit_one(struct pci_dev *pdev) return rc; if (pdev->vendor == PCI_VENDOR_ID_AMD) { - u8 fifo; - pci_read_config_byte(pdev, 0x41, &fifo); - if (pdev->device == PCI_DEVICE_ID_AMD_VIPER_7411) - /* FIFO is broken */ - pci_write_config_byte(pdev, 0x41, fifo & 0x0F); - else - pci_write_config_byte(pdev, 0x41, fifo | 0xF0); + amd_clear_fifo(pdev); if (pdev->device == PCI_DEVICE_ID_AMD_VIPER_7409 || pdev->device == PCI_DEVICE_ID_AMD_COBRA_7401) ata_pci_bmdma_clear_simplex(pdev); } - ata_host_resume(host); return 0; } From c55af1f5abf606118b32e3ce9c3b1bbce5236e7e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 11 Feb 2009 13:08:42 -0800 Subject: [PATCH 40/49] [libata] pata_legacy: for VLB 32bit PIO don't try tricks with slop These devices are generally used with ATA anyway and it seems that some ATAPI will need us to issue the right number of words. Therefore as we can't switch mid burst on VLB devices we should only use 32bit I/O for suitable block sizes. Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/ata/pata_legacy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 6c1d778b63a9..e3bc1b436284 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -283,9 +283,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) static unsigned int pdc_data_xfer_vlb(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw) { - if (ata_id_has_dword_io(dev->id)) { + int slop = buflen & 3; + /* 32bit I/O capable *and* we need to write a whole number of dwords */ + if (ata_id_has_dword_io(dev->id) && (slop == 0 || slop == 3)) { struct ata_port *ap = dev->link->ap; - int slop = buflen & 3; unsigned long flags; local_irq_save(flags); @@ -735,7 +736,7 @@ static unsigned int vlb32_data_xfer(struct ata_device *adev, unsigned char *buf, struct ata_port *ap = adev->link->ap; int slop = buflen & 3; - if (ata_id_has_dword_io(adev->id)) { + if (ata_id_has_dword_io(adev->id) && (slop == 0 || slop == 3)) { if (rw == WRITE) iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); else From 49f297f8df9adb797334155470ea9ca68bdb041e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 19 Feb 2009 18:52:20 +0000 Subject: [PATCH 41/49] powerpc: Fix load/store float double alignment handler When we introduced VSX, we changed the way FPRs are stored in the thread_struct. Unfortunately we missed the load/store float double alignment handler code when updating how we access FPRs in the thread_struct. Below fixes this and merges the little/big endian case. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/align.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index ada06924a423..73cb6a3229ae 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -367,27 +367,24 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, unsigned int flags) { - char *ptr = (char *) ¤t->thread.TS_FPR(reg); - int i, ret; + char *ptr0 = (char *) ¤t->thread.TS_FPR(reg); + char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); + int i, ret, sw = 0; if (!(flags & F)) return 0; if (reg & 1) return 0; /* invalid form: FRS/FRT must be even */ - if (!(flags & SW)) { - /* not byte-swapped - easy */ - if (!(flags & ST)) - ret = __copy_from_user(ptr, addr, 16); - else - ret = __copy_to_user(addr, ptr, 16); - } else { - /* each FPR value is byte-swapped separately */ - ret = 0; - for (i = 0; i < 16; ++i) { - if (!(flags & ST)) - ret |= __get_user(ptr[i^7], addr + i); - else - ret |= __put_user(ptr[i^7], addr + i); + if (flags & SW) + sw = 7; + ret = 0; + for (i = 0; i < 8; ++i) { + if (!(flags & ST)) { + ret |= __get_user(ptr0[i^sw], addr + i); + ret |= __get_user(ptr1[i^sw], addr + i + 8); + } else { + ret |= __put_user(ptr0[i^sw], addr + i); + ret |= __put_user(ptr1[i^sw], addr + i + 8); } } if (ret) From e423b9ecd6aa434ce9ba72a21fdc61079e620e0a Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Wed, 25 Feb 2009 13:26:48 +0000 Subject: [PATCH 42/49] powerpc: Fix 64bit memcpy() regression This fixes a regression introduced by commit 25d6e2d7c58ddc4a3b614fc5381591c0cfe66556 ("powerpc: Update 64bit memcpy() using CPU_FTR_UNALIGNED_LD_STD"). This commit allowed CPUs that have the CPU_FTR_UNALIGNED_LD_STD CPU feature bit present to do the memcpy() with unaligned load doubles. But, along with this came a bug where our final load double would read bytes beyond a page boundary and into the next (unmapped) page. This was caught by enabling CONFIG_DEBUG_PAGEALLOC, The fix was to read only the number of bytes that we need to store rather than reading a full 8-byte doubleword and storing only a portion of that. In order to minimise the amount of existing code touched we use the original do_tail for the src_unaligned case. Below is an example of the regression, as reported by Sachin Sant: Unable to handle kernel paging request for data at address 0xc00000003f380000 Faulting instruction address: 0xc000000000039574 cpu 0x1: Vector: 300 (Data Access) at [c00000003baf3020] pc: c000000000039574: .memcpy+0x74/0x244 lr: d00000000244916c: .ext3_xattr_get+0x288/0x2f4 [ext3] sp: c00000003baf32a0 msr: 8000000000009032 dar: c00000003f380000 dsisr: 40000000 current = 0xc00000003e54b010 paca = 0xc000000000a53680 pid = 1840, comm = readahead enter ? for help [link register ] d00000000244916c .ext3_xattr_get+0x288/0x2f4 [ext3] [c00000003baf32a0] d000000002449104 .ext3_xattr_get+0x220/0x2f4 [ext3] (unreliab le) [c00000003baf3390] d00000000244a6e8 .ext3_xattr_security_get+0x40/0x5c [ext3] [c00000003baf3400] c000000000148154 .generic_getxattr+0x74/0x9c [c00000003baf34a0] c000000000333400 .inode_doinit_with_dentry+0x1c4/0x678 [c00000003baf3560] c00000000032c6b0 .security_d_instantiate+0x50/0x68 [c00000003baf35e0] c00000000013c818 .d_instantiate+0x78/0x9c [c00000003baf3680] c00000000013ced0 .d_splice_alias+0xf0/0x120 [c00000003baf3720] d00000000243e05c .ext3_lookup+0xec/0x134 [ext3] [c00000003baf37c0] c000000000131e74 .do_lookup+0x110/0x260 [c00000003baf3880] c000000000134ed0 .__link_path_walk+0xa98/0x1010 [c00000003baf3970] c0000000001354a0 .path_walk+0x58/0xc4 [c00000003baf3a20] c000000000135720 .do_path_lookup+0x138/0x1e4 [c00000003baf3ad0] c00000000013645c .path_lookup_open+0x6c/0xc8 [c00000003baf3b70] c000000000136780 .do_filp_open+0xcc/0x874 [c00000003baf3d10] c0000000001251e0 .do_sys_open+0x80/0x140 [c00000003baf3dc0] c00000000016aaec .compat_sys_open+0x24/0x38 [c00000003baf3e30] c00000000000855c syscall_exit+0x0/0x40 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/memcpy_64.S | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index fe2d34e5332d..e178922b2c21 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -53,18 +53,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: std r8,8(r3) beq 3f addi r3,r3,16 - ld r9,8(r4) .Ldo_tail: bf cr7*4+1,1f - rotldi r9,r9,32 + lwz r9,8(r4) + addi r4,r4,4 stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f - rotldi r9,r9,16 + lhz r9,8(r4) + addi r4,r4,2 sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f - rotldi r9,r9,8 + lbz r9,8(r4) stb r9,0(r3) 3: ld r3,48(r1) /* return dest pointer */ blr @@ -133,11 +134,24 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) cmpwi cr1,r5,8 addi r3,r3,32 sld r9,r9,r10 - ble cr1,.Ldo_tail + ble cr1,6f ld r0,8(r4) srd r7,r0,r11 or r9,r7,r9 - b .Ldo_tail +6: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: ld r3,48(r1) /* return dest pointer */ + blr .Ldst_unaligned: PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 From f72b728bf100f276628e378e1fe6c6acd5d09401 Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Wed, 25 Feb 2009 13:46:24 +0000 Subject: [PATCH 43/49] powerpc: Fix 64bit __copy_tofrom_user() regression This fixes a regression introduced by commit a4e22f02f5b6518c1484faea1f88d81802b9feac ("powerpc: Update 64bit __copy_tofrom_user() using CPU_FTR_UNALIGNED_LD_STD"). The same bug that existed in the 64bit memcpy() also exists here so fix it here too. The fix is the same as that applied to memcpy() with the addition of fixes for the exception handling code required for __copy_tofrom_user(). This stops us reading beyond the end of the source region we were told to copy. Signed-off-by: Mark Nelson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/copyuser_64.S | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 70693a5c12a1..693b14a778fa 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -62,18 +62,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 72: std r8,8(r3) beq+ 3f addi r3,r3,16 -23: ld r9,8(r4) .Ldo_tail: bf cr7*4+1,1f - rotldi r9,r9,32 +23: lwz r9,8(r4) + addi r4,r4,4 73: stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f - rotldi r9,r9,16 +44: lhz r9,8(r4) + addi r4,r4,2 74: sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f - rotldi r9,r9,8 +45: lbz r9,8(r4) 75: stb r9,0(r3) 3: li r3,0 blr @@ -141,11 +142,24 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 6: cmpwi cr1,r5,8 addi r3,r3,32 sld r9,r9,r10 - ble cr1,.Ldo_tail + ble cr1,7f 34: ld r0,8(r4) srd r7,r0,r11 or r9,r7,r9 - b .Ldo_tail +7: + bf cr7*4+1,1f + rotldi r9,r9,32 +94: stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 +95: sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 +96: stb r9,0(r3) +3: li r3,0 + blr .Ldst_unaligned: PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ @@ -218,7 +232,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 121: 132: addi r3,r3,8 -123: 134: 135: 138: @@ -226,6 +239,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 140: 141: 142: +123: +144: +145: /* * here we have had a fault on a load and r3 points to the first @@ -309,6 +325,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 187: 188: 189: +194: +195: +196: 1: ld r6,-24(r1) ld r5,-8(r1) @@ -329,7 +348,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) .llong 72b,172b .llong 23b,123b .llong 73b,173b + .llong 44b,144b .llong 74b,174b + .llong 45b,145b .llong 75b,175b .llong 24b,124b .llong 25b,125b @@ -347,6 +368,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) .llong 79b,179b .llong 80b,180b .llong 34b,134b + .llong 94b,194b + .llong 95b,195b + .llong 96b,196b .llong 35b,135b .llong 81b,181b .llong 36b,136b From b2bf96833c5782befc3e7700f791fde754a47b01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Feb 2009 08:50:26 +0100 Subject: [PATCH 44/49] block: fix bogus gcc warning for uninitialized var usage Newer gcc throw this warning: fs/bio.c: In function ?bio_alloc_bioset?: fs/bio.c:305: warning: ?p? may be used uninitialized in this function since it cannot figure out that 'p' is only ever used if 'bs' is non-NULL. Signed-off-by: Jens Axboe --- fs/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bio.c b/fs/bio.c index 72ab251cdb9c..124b95c4d582 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -302,7 +302,7 @@ void bio_init(struct bio *bio) struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = NULL; - void *p; + void *uninitialized_var(p); if (bs) { p = mempool_alloc(bs->bio_pool, gfp_mask); From 9e8c0bccdc944bd09361672d47660810c027bcaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20N=C3=A9meth?= Date: Fri, 20 Feb 2009 08:12:51 +0100 Subject: [PATCH 45/49] block: add documentation for register_blkdev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for register_blkdev() function and for the parameters. Signed-off-by: Márton Németh Cc: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- block/genhd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index e1eadcc9546a..a9ec910974c1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -256,6 +256,22 @@ void blkdev_show(struct seq_file *seqf, off_t offset) } #endif /* CONFIG_PROC_FS */ +/** + * register_blkdev - register a new block device + * + * @major: the requested major device number [1..255]. If @major=0, try to + * allocate any unused major number. + * @name: the name of the new block device as a zero terminated string + * + * The @name must be unique within the system. + * + * The return value depends on the @major input parameter. + * - if a major device number was requested in range [1..255] then the + * function returns zero on success, or a negative error code + * - if any unused major number was requested with @major=0 parameter + * then the return value is the allocated major number in range + * [1..255] or a negative error code otherwise + */ int register_blkdev(unsigned int major, const char *name) { struct blk_major_name **n, *p; From 5e4c91c84b194b26cf592779e451f4b5be777cba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 08:53:35 +0100 Subject: [PATCH 46/49] cciss: shorten 30s timeout on controller reset If reset_devices is set for kexec, then cciss will delay 30 seconds since the old 5i controller _may_ need that long to recover. Replace the long sleep with incremental sleep and tests to reduce the 30 seconds to worst case for 5i, so that other controllers will proceed quickly. Reviewed-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2cb67b61176..b5a061114630 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3611,11 +3611,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, schedule_timeout_uninterruptible(30*HZ); /* Now try to get the controller to respond to a no-op */ - for (i=0; i<12; i++) { + for (i=0; i<30; i++) { if (cciss_noop(pdev) == 0) break; - else - printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); + + schedule_timeout_uninterruptible(HZ); + } + if (i == 30) { + printk(KERN_ERR "cciss: controller seems dead\n"); + return -EBUSY; } } From 1e42807918d17e8c93bf14fbb74be84b141334c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 09:03:10 +0100 Subject: [PATCH 47/49] block: reduce stack footprint of blk_recount_segments() blk_recalc_rq_segments() requires a request structure passed in, which we don't have from blk_recount_segments(). So the latter allocates one on the stack, using > 400 bytes of stack for that. This can cause us to spill over one page of stack from ext4 at least: 0) 4560 400 blk_recount_segments+0x43/0x62 1) 4160 32 bio_phys_segments+0x1c/0x24 2) 4128 32 blk_rq_bio_prep+0x2a/0xf9 3) 4096 32 init_request_from_bio+0xf9/0xfe 4) 4064 112 __make_request+0x33c/0x3f6 5) 3952 144 generic_make_request+0x2d1/0x321 6) 3808 64 submit_bio+0xb9/0xc3 7) 3744 48 submit_bh+0xea/0x10e 8) 3696 368 ext4_mb_init_cache+0x257/0xa6a [ext4] 9) 3328 288 ext4_mb_regular_allocator+0x421/0xcd9 [ext4] 10) 3040 160 ext4_mb_new_blocks+0x211/0x4b4 [ext4] 11) 2880 336 ext4_ext_get_blocks+0xb61/0xd45 [ext4] 12) 2544 96 ext4_get_blocks_wrap+0xf2/0x200 [ext4] 13) 2448 80 ext4_da_get_block_write+0x6e/0x16b [ext4] 14) 2368 352 mpage_da_map_blocks+0x7e/0x4b3 [ext4] 15) 2016 352 ext4_da_writepages+0x2ce/0x43c [ext4] 16) 1664 32 do_writepages+0x2d/0x3c 17) 1632 144 __writeback_single_inode+0x162/0x2cd 18) 1488 96 generic_sync_sb_inodes+0x1e3/0x32b 19) 1392 16 sync_sb_inodes+0xe/0x10 20) 1376 48 writeback_inodes+0x69/0xb3 21) 1328 208 balance_dirty_pages_ratelimited_nr+0x187/0x2f9 22) 1120 224 generic_file_buffered_write+0x1d4/0x2c4 23) 896 176 __generic_file_aio_write_nolock+0x35f/0x393 24) 720 80 generic_file_aio_write+0x6c/0xc8 25) 640 80 ext4_file_write+0xa9/0x137 [ext4] 26) 560 320 do_sync_write+0xf0/0x137 27) 240 48 vfs_write+0xb3/0x13c 28) 192 64 sys_write+0x4c/0x74 29) 128 128 system_call_fastpath+0x16/0x1b Split the segment counting out into a __blk_recalc_rq_segments() helper to avoid allocating an onstack request just for checking the physical segment count. Signed-off-by: Jens Axboe --- block/blk-merge.c | 94 ++++++++++++++++++++++++------------------ include/linux/blkdev.h | 2 + 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index b92f5b0866b0..a104593e70c3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -38,72 +38,84 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) } } -void blk_recalc_rq_segments(struct request *rq) +static unsigned int __blk_recalc_rq_segments(struct request_queue *q, + struct bio *bio, + unsigned int *seg_size_ptr) { - int nr_phys_segs; unsigned int phys_size; struct bio_vec *bv, *bvprv = NULL; - int seg_size; - int cluster; - struct req_iterator iter; - int high, highprv = 1; - struct request_queue *q = rq->q; + int cluster, i, high, highprv = 1; + unsigned int seg_size, nr_phys_segs; + struct bio *fbio; - if (!rq->bio) - return; + if (!bio) + return 0; + fbio = bio; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); seg_size = 0; phys_size = nr_phys_segs = 0; - rq_for_each_segment(bv, rq, iter) { - /* - * the trick here is making sure that a high page is never - * considered part of another segment, since that might - * change with the bounce page. - */ - high = page_to_pfn(bv->bv_page) > q->bounce_pfn; - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len > q->max_segment_size) - goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + for_each_bio(bio) { + bio_for_each_segment(bv, bio, i) { + /* + * the trick here is making sure that a high page is + * never considered part of another segment, since that + * might change with the bounce page. + */ + high = page_to_pfn(bv->bv_page) > q->bounce_pfn; + if (high || highprv) goto new_segment; + if (cluster) { + if (seg_size + bv->bv_len > q->max_segment_size) + goto new_segment; + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + goto new_segment; - seg_size += bv->bv_len; - bvprv = bv; - continue; - } + seg_size += bv->bv_len; + bvprv = bv; + continue; + } new_segment: - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) - rq->bio->bi_seg_front_size = seg_size; + if (nr_phys_segs == 1 && seg_size > + fbio->bi_seg_front_size) + fbio->bi_seg_front_size = seg_size; - nr_phys_segs++; - bvprv = bv; - seg_size = bv->bv_len; - highprv = high; + nr_phys_segs++; + bvprv = bv; + seg_size = bv->bv_len; + highprv = high; + } } - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + if (seg_size_ptr) + *seg_size_ptr = seg_size; + + return nr_phys_segs; +} + +void blk_recalc_rq_segments(struct request *rq) +{ + unsigned int seg_size = 0, phys_segs; + + phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size); + + if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) rq->bio->bi_seg_front_size = seg_size; if (seg_size > rq->biotail->bi_seg_back_size) rq->biotail->bi_seg_back_size = seg_size; - rq->nr_phys_segments = nr_phys_segs; + rq->nr_phys_segments = phys_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - struct request rq; struct bio *nxt = bio->bi_next; - rq.q = q; - rq.bio = rq.biotail = bio; + bio->bi_next = NULL; - blk_recalc_rq_segments(&rq); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL); bio->bi_next = nxt; - bio->bi_phys_segments = rq.nr_phys_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dcaa0fd84b02..465d6babc847 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -708,6 +708,8 @@ struct req_iterator { }; /* This should not be used directly - use rq_for_each_segment */ +#define for_each_bio(_bio) \ + for (; _bio; _bio = _bio->bi_next) #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) From 9e973e64ac6dc504e6447d52193d4fff1a670156 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Feb 2009 08:10:09 +0100 Subject: [PATCH 48/49] xen/blkfront: use blk_rq_map_sg to generate ring entries On occasion, the request will apparently have more segments than we fit into the ring. Jens says: > The second problem is that the block layer then appears to create one > too many segments, but from the dump it has rq->nr_phys_segments == > BLKIF_MAX_SEGMENTS_PER_REQUEST. I suspect the latter is due to > xen-blkfront not handling the merging on its own. It should check that > the new page doesn't form part of the previous page. The > rq_for_each_segment() iterates all single bits in the request, not dma > segments. The "easiest" way to do this is to call blk_rq_map_sg() and > then iterate the mapped sg list. That will give you what you are > looking for. > Here's a test patch, compiles but otherwise untested. I spent more > time figuring out how to enable XEN than to code it up, so YMMV! > Probably the sg list wants to be put inside the ring and only > initialized on allocation, then you can get rid of the sg on stack and > sg_init_table() loop call in the function. I'll leave that, and the > testing, to you. [Moved sg array into info structure, and initialize once. -J] Signed-off-by: Jens Axboe Signed-off-by: Jeremy Fitzhardinge --- drivers/block/xen-blkfront.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 918ef725de41..b6c8ce254359 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -82,6 +83,7 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; + struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -204,12 +206,11 @@ static int blkif_queue_request(struct request *req) struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_mfn; struct blkif_request *ring_req; - struct req_iterator iter; - struct bio_vec *bvec; unsigned long id; unsigned int fsect, lsect; - int ref; + int i, ref; grant_ref_t gref_head; + struct scatterlist *sg; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; @@ -238,12 +239,13 @@ static int blkif_queue_request(struct request *req) if (blk_barrier_rq(req)) ring_req->operation = BLKIF_OP_WRITE_BARRIER; - ring_req->nr_segments = 0; - rq_for_each_segment(bvec, req, iter) { - BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST); - buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); + fsect = sg->offset >> 9; + lsect = fsect + (sg->length >> 9) - 1; /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); @@ -254,16 +256,12 @@ static int blkif_queue_request(struct request *req) buffer_mfn, rq_data_dir(req) ); - info->shadow[id].frame[ring_req->nr_segments] = - mfn_to_pfn(buffer_mfn); - - ring_req->seg[ring_req->nr_segments] = + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); + ring_req->seg[i] = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; - - ring_req->nr_segments++; } info->ring.req_prod_pvt++; @@ -622,6 +620,8 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); From 86883c2736e9697a38080a31c2794fa1316fd68f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 26 Feb 2009 10:32:31 -0800 Subject: [PATCH 49/49] Make ieee1394_init a fs-initcall It needs to happen before any firewire driver actually registers itself, and that was previously handled by having the Makefile list the core ieee1394 files before the drivers. But now there are firewire drivers in drivers/media, and the Makefile games aren't enough. So just make ieee1394_init happen earlier in the init sequence, the way all other bus layers already do. Reported-and-tested-by: Ingo Molnar Cc: Stefan Richter Cc: Henrik Kurelid Cc: Mauro Carvalho Chehab Cc: Ben Backx Signed-off-by: Linus Torvalds --- drivers/ieee1394/ieee1394_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index 1028e725a27e..872338003721 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -1275,7 +1275,7 @@ static void __exit ieee1394_cleanup(void) unregister_chrdev_region(IEEE1394_CORE_DEV, 256); } -module_init(ieee1394_init); +fs_initcall(ieee1394_init); module_exit(ieee1394_cleanup); /* Exported symbols */