3389e56d31
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl5Cn0wACgkQONu9yGCS aT584xAAtePSlzTxst/jukREoyrpAfTM1BeovMdsZEBpKh+/F3n1udqHeo+iNAAN qSOig012aW2qP7b5/4CrEU9ZRTvd0AM4fog7ABLJVahMYMqoJgod8TRaE4v0nVut eRans6w3NbZJCZwdw2aiu5gwFfjwJLSUckBNmj4XVYdyfh7q0BgnZV5OY0V+zhuG 1MWXaylbRqjguR/ZFk0UPAmRaqNKHbwfCJ1V0ygL9xQkJM0cUn7hX9/CqM4aYnm6 m1oux4ektLAmF1XK4NiQEuRBMeFO74XlKcsZqQHf/b4FZfcPergcPwIj8ugtCHzJ kx2QgURDjgH4Tnu+Q0ScPrjj2kjU8rWmjqlcv1PcUyOWm+MR0OK9bW7TLEntMSF8 HOEe9j6SsjQNIOoYh1YcMnuGjKNIZjl2L3VbDzpVN2GxZxwAutY6G68tV7sbA2pu wtsrAVOqdcjoo0ruRmwognBqQAdNdsbiBx7bgcNjVEXWL0N3Ddiv6CNYwnehA5Hq cvQwVQpFGP9ZGYUcCMbdwR+7kJzVy6V2S615M8GkE9FouOwTfV60zM/sZ1rFVt1J 70zxfRX5ys19aTAVkbi6pHHCUJ0ZAiTgWujp5Hp4kPt7gEz01Ur0s1kI3b7b6iWh cuycRFULvqeXCApQacs//lOVDoUV20uFcL/zqOFM33v/+YzkyjA= =3D8z -----END PGP SIGNATURE----- Merge 4.19.103 into android-4.19 Changes in 4.19.103 Revert "drm/sun4i: dsi: Change the start delay calculation" ovl: fix lseek overflow on 32bit kernel/module: Fix memleak in module_add_modinfo_attrs() media: iguanair: fix endpoint sanity check ocfs2: fix oops when writing cloned file x86/cpu: Update cached HLE state on write to TSX_CTRL_CPUID_CLEAR udf: Allow writing to 'Rewritable' partitions printk: fix exclusive_console replaying iwlwifi: mvm: fix NVM check for 3168 devices sparc32: fix struct ipc64_perm type definition cls_rsvp: fix rsvp_policy gtp: use __GFP_NOWARN to avoid memalloc warning l2tp: Allow duplicate session creation with UDP net: hsr: fix possible NULL deref in hsr_handle_frame() net_sched: fix an OOB access in cls_tcindex net: stmmac: Delete txtimer in suspend() bnxt_en: Fix TC queue mapping. tcp: clear tp->total_retrans in tcp_disconnect() tcp: clear tp->delivered in tcp_disconnect() tcp: clear tp->data_segs{in|out} in tcp_disconnect() tcp: clear tp->segs_{in|out} in tcp_disconnect() rxrpc: Fix use-after-free in rxrpc_put_local() rxrpc: Fix insufficient receive notification generation rxrpc: Fix missing active use pinning of rxrpc_local object rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect media: uvcvideo: Avoid cyclic entity chains due to malformed USB descriptors mfd: dln2: More sanity checking for endpoints ipc/msg.c: consolidate all xxxctl_down() functions tracing: Fix sched switch start/stop refcount racy updates rcu: Avoid data-race in rcu_gp_fqs_check_wake() brcmfmac: Fix memory leak in brcmf_usbdev_qinit usb: typec: tcpci: mask event interrupts when remove driver usb: gadget: legacy: set max_speed to super-speed usb: gadget: f_ncm: Use atomic_t to track in-flight request usb: gadget: f_ecm: Use atomic_t to track in-flight request ALSA: usb-audio: Fix endianess in descriptor validation ALSA: dummy: Fix PCM format loop in proc output mm/memory_hotplug: fix remove_memory() lockdep splat mm: move_pages: report the number of non-attempted pages media/v4l2-core: set pages dirty upon releasing DMA buffers media: v4l2-core: compat: ignore native command codes media: v4l2-rect.h: fix v4l2_rect_map_inside() top/left adjustments lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more() irqdomain: Fix a memory leak in irq_domain_push_irq() platform/x86: intel_scu_ipc: Fix interrupt support ALSA: hda: Add Clevo W65_67SB the power_save blacklist KVM: arm64: Correct PSTATE on exception entry KVM: arm/arm64: Correct CPSR on exception entry KVM: arm/arm64: Correct AArch32 SPSR on exception entry KVM: arm64: Only sign-extend MMIO up to register width MIPS: fix indentation of the 'RELOCS' message MIPS: boot: fix typo in 'vmlinux.lzma.its' target s390/mm: fix dynamic pagetable upgrade for hugetlbfs powerpc/xmon: don't access ASDR in VMs powerpc/pseries: Advance pfn if section is not present in lmb_is_removable() smb3: fix signing verification of large reads PCI: tegra: Fix return value check of pm_runtime_get_sync() mmc: spi: Toggle SPI polarity, do not hardcode it ACPI: video: Do not export a non working backlight interface on MSI MS-7721 boards ACPI / battery: Deal with design or full capacity being reported as -1 ACPI / battery: Use design-cap for capacity calculations if full-cap is not available ACPI / battery: Deal better with neither design nor full capacity not being reported alarmtimer: Unregister wakeup source when module get fails ubifs: Reject unsupported ioctl flags explicitly ubifs: don't trigger assertion on invalid no-key filename ubifs: Fix FS_IOC_SETFLAGS unexpectedly clearing encrypt flag ubifs: Fix deadlock in concurrent bulk-read and writepage crypto: geode-aes - convert to skcipher API and make thread-safe PCI: keystone: Fix link training retries initiation mmc: sdhci-of-at91: fix memleak on clk_get failure hv_balloon: Balloon up according to request page number mfd: axp20x: Mark AXP20X_VBUS_IPSOUT_MGMT as volatile crypto: api - Check spawn->alg under lock in crypto_drop_spawn crypto: ccree - fix backlog memory leak crypto: ccree - fix pm wrongful error reporting crypto: ccree - fix PM race condition scripts/find-unused-docs: Fix massive false positives scsi: qla2xxx: Fix mtcp dump collection failure power: supply: ltc2941-battery-gauge: fix use-after-free ovl: fix wrong WARN_ON() in ovl_cache_update_ino() f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project() f2fs: fix miscounted block limit in f2fs_statfs_project() f2fs: code cleanup for f2fs_statfs_project() PM: core: Fix handling of devices deleted during system-wide resume of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc dm zoned: support zone sizes smaller than 128MiB dm space map common: fix to ensure new block isn't already in use dm crypt: fix benbi IV constructor crash if used in authenticated mode dm: fix potential for q->make_request_fn NULL pointer dm writecache: fix incorrect flush sequence when doing SSD mode commit padata: Remove broken queue flushing tracing: Annotate ftrace_graph_hash pointer with __rcu tracing: Annotate ftrace_graph_notrace_hash pointer with __rcu ftrace: Add comment to why rcu_dereference_sched() is open coded ftrace: Protect ftrace_graph_hash with ftrace_sync samples/bpf: Don't try to remove user's homedir on clean crypto: ccp - set max RSA modulus size for v3 platform devices as well crypto: pcrypt - Do not clear MAY_SLEEP flag in original request crypto: atmel-aes - Fix counter overflow in CTR mode crypto: api - Fix race condition in crypto_spawn_alg crypto: picoxcell - adjust the position of tasklet_init and fix missed tasklet_kill scsi: qla2xxx: Fix unbound NVME response length NFS: Fix memory leaks and corruption in readdir NFS: Directory page cache pages need to be locked when read jbd2_seq_info_next should increase position index Btrfs: fix missing hole after hole punching and fsync when using NO_HOLES btrfs: set trans->drity in btrfs_commit_transaction Btrfs: fix race between adding and putting tree mod seq elements and nodes ARM: tegra: Enable PLLP bypass during Tegra124 LP1 iwlwifi: don't throw error when trying to remove IGTK mwifiex: fix unbalanced locking in mwifiex_process_country_ie() sunrpc: expiry_time should be seconds not timeval gfs2: move setting current->backing_dev_info gfs2: fix O_SYNC write handling drm/rect: Avoid division by zero media: rc: ensure lirc is initialized before registering input device tools/kvm_stat: Fix kvm_exit filter name xen/balloon: Support xend-based toolstack take two watchdog: fix UAF in reboot notifier handling in watchdog core code bcache: add readahead cache policy options via sysfs interface eventfd: track eventfd_signal() recursion depth aio: prevent potential eventfd recursion on poll KVM: x86: Refactor picdev_write() to prevent Spectre-v1/L1TF attacks KVM: x86: Refactor prefix decoding to prevent Spectre-v1/L1TF attacks KVM: x86: Protect pmu_intel.c from Spectre-v1/L1TF attacks KVM: x86: Protect DR-based index computations from Spectre-v1/L1TF attacks KVM: x86: Protect kvm_lapic_reg_write() from Spectre-v1/L1TF attacks KVM: x86: Protect kvm_hv_msr_[get|set]_crash_data() from Spectre-v1/L1TF attacks KVM: x86: Protect ioapic_write_indirect() from Spectre-v1/L1TF attacks KVM: x86: Protect MSR-based index computations in pmu.h from Spectre-v1/L1TF attacks KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks KVM: x86: Protect MSR-based index computations from Spectre-v1/L1TF attacks in x86.c KVM: x86: Protect x86_decode_insn from Spectre-v1/L1TF attacks KVM: x86: Protect MSR-based index computations in fixed_msr_to_seg_unit() from Spectre-v1/L1TF attacks KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform KVM: PPC: Book3S HV: Uninit vCPU if vcore creation fails KVM: PPC: Book3S PR: Free shared page if mmu initialization fails x86/kvm: Be careful not to clear KVM_VCPU_FLUSH_TLB bit KVM: x86: Don't let userspace set host-reserved cr4 bits KVM: x86: Free wbinvd_dirty_mask if vCPU creation fails KVM: s390: do not clobber registers during guest reset/store status clk: tegra: Mark fuse clock as critical drm/amd/dm/mst: Ignore payload update failures percpu: Separate decrypted varaibles anytime encryption can be enabled scsi: qla2xxx: Fix the endianness of the qla82xx_get_fw_size() return type scsi: csiostor: Adjust indentation in csio_device_reset scsi: qla4xxx: Adjust indentation in qla4xxx_mem_free scsi: ufs: Recheck bkops level if bkops is disabled phy: qualcomm: Adjust indentation in read_poll_timeout ext2: Adjust indentation in ext2_fill_super powerpc/44x: Adjust indentation in ibm4xx_denali_fixup_memsize drm: msm: mdp4: Adjust indentation in mdp4_dsi_encoder_enable NFC: pn544: Adjust indentation in pn544_hci_check_presence ppp: Adjust indentation into ppp_async_input net: smc911x: Adjust indentation in smc911x_phy_configure net: tulip: Adjust indentation in {dmfe, uli526x}_init_module IB/mlx5: Fix outstanding_pi index for GSI qps IB/core: Fix ODP get user pages flow nfsd: fix delay timer on 32-bit architectures nfsd: fix jiffies/time_t mixup in LRU list nfsd: Return the correct number of bytes written to the file ubi: fastmap: Fix inverted logic in seen selfcheck ubi: Fix an error pointer dereference in error handling code mfd: da9062: Fix watchdog compatible string mfd: rn5t618: Mark ADC control register volatile bonding/alb: properly access headers in bond_alb_xmit() net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port net: mvneta: move rx_dropped and rx_errors in per-cpu stats net_sched: fix a resource leak in tcindex_set_parms() net: systemport: Avoid RBUF stuck in Wake-on-LAN mode net/mlx5: IPsec, Fix esp modify function attribute net/mlx5: IPsec, fix memory leak at mlx5_fpga_ipsec_delete_sa_ctx net: macb: Remove unnecessary alignment check for TSO net: macb: Limit maximum GEM TX length in TSO net: dsa: b53: Always use dev->vlan_enabled in b53_configure_vlan() ext4: fix deadlock allocating crypto bounce page from mempool btrfs: use bool argument in free_root_pointers() btrfs: free block groups after free'ing fs trees drm: atmel-hlcdc: enable clock before configuring timing engine drm/dp_mst: Remove VCPI while disabling topology mgr btrfs: flush write bio if we loop in extent_write_cache_pages KVM: x86/mmu: Apply max PA check for MMIO sptes to 32-bit KVM KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM KVM: VMX: Add non-canonical check on writes to RTIT address MSRs KVM: nVMX: vmread should not set rflags to specify success in case of #PF KVM: Use vcpu-specific gva->hva translation when querying host page size KVM: Play nice with read-only memslots when querying host page size mm: zero remaining unavailable struct pages mm: return zero_resv_unavail optimization mm/page_alloc.c: fix uninitialized memmaps on a partially populated last section cifs: fail i/o on soft mounts if sessionsetup errors out x86/apic/msi: Plug non-maskable MSI affinity race clocksource: Prevent double add_timer_on() for watchdog_timer perf/core: Fix mlock accounting in perf_mmap() rxrpc: Fix service call disconnection Linux 4.19.103 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I0d7f09085c3541373e0fd6b2e3ffacc5e34f7d55
537 lines
14 KiB
C
537 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/ext4/page-io.c
|
|
*
|
|
* This contains the new page_io functions for ext4
|
|
*
|
|
* Written by Theodore Ts'o, 2010.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/time.h>
|
|
#include <linux/highuid.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/quotaops.h>
|
|
#include <linux/string.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/backing-dev.h>
|
|
|
|
#include "ext4_jbd2.h"
|
|
#include "xattr.h"
|
|
#include "acl.h"
|
|
|
|
static struct kmem_cache *io_end_cachep;
|
|
|
|
int __init ext4_init_pageio(void)
|
|
{
|
|
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
|
if (io_end_cachep == NULL)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void ext4_exit_pageio(void)
|
|
{
|
|
kmem_cache_destroy(io_end_cachep);
|
|
}
|
|
|
|
/*
|
|
* Print an buffer I/O error compatible with the fs/buffer.c. This
|
|
* provides compatibility with dmesg scrapers that look for a specific
|
|
* buffer I/O error message. We really need a unified error reporting
|
|
* structure to userspace ala Digital Unix's uerf system, but it's
|
|
* probably not going to happen in my lifetime, due to LKML politics...
|
|
*/
|
|
static void buffer_io_error(struct buffer_head *bh)
|
|
{
|
|
printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n",
|
|
bh->b_bdev,
|
|
(unsigned long long)bh->b_blocknr);
|
|
}
|
|
|
|
static void ext4_finish_bio(struct bio *bio)
|
|
{
|
|
int i;
|
|
struct bio_vec *bvec;
|
|
|
|
bio_for_each_segment_all(bvec, bio, i) {
|
|
struct page *page = bvec->bv_page;
|
|
struct page *bounce_page = NULL;
|
|
struct buffer_head *bh, *head;
|
|
unsigned bio_start = bvec->bv_offset;
|
|
unsigned bio_end = bio_start + bvec->bv_len;
|
|
unsigned under_io = 0;
|
|
unsigned long flags;
|
|
|
|
if (!page)
|
|
continue;
|
|
|
|
if (fscrypt_is_bounce_page(page)) {
|
|
bounce_page = page;
|
|
page = fscrypt_pagecache_page(bounce_page);
|
|
}
|
|
|
|
if (bio->bi_status) {
|
|
SetPageError(page);
|
|
mapping_set_error(page->mapping, -EIO);
|
|
}
|
|
bh = head = page_buffers(page);
|
|
/*
|
|
* We check all buffers in the page under BH_Uptodate_Lock
|
|
* to avoid races with other end io clearing async_write flags
|
|
*/
|
|
local_irq_save(flags);
|
|
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
|
do {
|
|
if (bh_offset(bh) < bio_start ||
|
|
bh_offset(bh) + bh->b_size > bio_end) {
|
|
if (buffer_async_write(bh))
|
|
under_io++;
|
|
continue;
|
|
}
|
|
clear_buffer_async_write(bh);
|
|
if (bio->bi_status)
|
|
buffer_io_error(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
|
local_irq_restore(flags);
|
|
if (!under_io) {
|
|
fscrypt_free_bounce_page(bounce_page);
|
|
end_page_writeback(page);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void ext4_release_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
struct bio *bio, *next_bio;
|
|
|
|
BUG_ON(!list_empty(&io_end->list));
|
|
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
|
WARN_ON(io_end->handle);
|
|
|
|
for (bio = io_end->bio; bio; bio = next_bio) {
|
|
next_bio = bio->bi_private;
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
kmem_cache_free(io_end_cachep, io_end);
|
|
}
|
|
|
|
/*
|
|
* Check a range of space and convert unwritten extents to written. Note that
|
|
* we are protected from truncate touching same part of extent tree by the
|
|
* fact that truncate code waits for all DIO to finish (thus exclusion from
|
|
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
|
|
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
|
|
* completed (happens from ext4_free_ioend()).
|
|
*/
|
|
static int ext4_end_io(ext4_io_end_t *io)
|
|
{
|
|
struct inode *inode = io->inode;
|
|
loff_t offset = io->offset;
|
|
ssize_t size = io->size;
|
|
handle_t *handle = io->handle;
|
|
int ret = 0;
|
|
|
|
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
|
"list->prev 0x%p\n",
|
|
io, inode->i_ino, io->list.next, io->list.prev);
|
|
|
|
io->handle = NULL; /* Following call will use up the handle */
|
|
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
|
|
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
|
|
ext4_msg(inode->i_sb, KERN_EMERG,
|
|
"failed to convert unwritten extents to written "
|
|
"extents -- potential data loss! "
|
|
"(inode %lu, offset %llu, size %zd, error %d)",
|
|
inode->i_ino, offset, size, ret);
|
|
}
|
|
ext4_clear_io_unwritten_flag(io);
|
|
ext4_release_io_end(io);
|
|
return ret;
|
|
}
|
|
|
|
static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
|
{
|
|
#ifdef EXT4FS_DEBUG
|
|
struct list_head *cur, *before, *after;
|
|
ext4_io_end_t *io, *io0, *io1;
|
|
|
|
if (list_empty(head))
|
|
return;
|
|
|
|
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
|
|
list_for_each_entry(io, head, list) {
|
|
cur = &io->list;
|
|
before = cur->prev;
|
|
io0 = container_of(before, ext4_io_end_t, list);
|
|
after = cur->next;
|
|
io1 = container_of(after, ext4_io_end_t, list);
|
|
|
|
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
|
io, inode->i_ino, io0, io1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* Add the io_end to per-inode completed end_io list. */
|
|
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
|
{
|
|
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
|
struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
|
|
struct workqueue_struct *wq;
|
|
unsigned long flags;
|
|
|
|
/* Only reserved conversions from writeback should enter here */
|
|
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
|
WARN_ON(!io_end->handle && sbi->s_journal);
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
wq = sbi->rsv_conversion_wq;
|
|
if (list_empty(&ei->i_rsv_conversion_list))
|
|
queue_work(wq, &ei->i_rsv_conversion_work);
|
|
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
}
|
|
|
|
static int ext4_do_flush_completed_IO(struct inode *inode,
|
|
struct list_head *head)
|
|
{
|
|
ext4_io_end_t *io;
|
|
struct list_head unwritten;
|
|
unsigned long flags;
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
int err, ret = 0;
|
|
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
dump_completed_IO(inode, head);
|
|
list_replace_init(head, &unwritten);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
|
|
while (!list_empty(&unwritten)) {
|
|
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
|
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
|
list_del_init(&io->list);
|
|
|
|
err = ext4_end_io(io);
|
|
if (unlikely(!ret && err))
|
|
ret = err;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* work on completed IO, to convert unwritten extents to extents
|
|
*/
|
|
void ext4_end_io_rsv_work(struct work_struct *work)
|
|
{
|
|
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
|
|
i_rsv_conversion_work);
|
|
ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
|
|
}
|
|
|
|
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
|
{
|
|
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
|
if (io) {
|
|
io->inode = inode;
|
|
INIT_LIST_HEAD(&io->list);
|
|
atomic_set(&io->count, 1);
|
|
}
|
|
return io;
|
|
}
|
|
|
|
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
|
{
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
|
ext4_release_io_end(io_end);
|
|
return;
|
|
}
|
|
ext4_add_complete_io(io_end);
|
|
}
|
|
}
|
|
|
|
int ext4_put_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
int err = 0;
|
|
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
err = ext4_convert_unwritten_extents(io_end->handle,
|
|
io_end->inode, io_end->offset,
|
|
io_end->size);
|
|
io_end->handle = NULL;
|
|
ext4_clear_io_unwritten_flag(io_end);
|
|
}
|
|
ext4_release_io_end(io_end);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
atomic_inc(&io_end->count);
|
|
return io_end;
|
|
}
|
|
|
|
/* BIO completion function for page writeback */
|
|
static void ext4_end_bio(struct bio *bio)
|
|
{
|
|
ext4_io_end_t *io_end = bio->bi_private;
|
|
sector_t bi_sector = bio->bi_iter.bi_sector;
|
|
char b[BDEVNAME_SIZE];
|
|
|
|
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
|
|
bio_devname(bio, b),
|
|
(long long) bio->bi_iter.bi_sector,
|
|
(unsigned) bio_sectors(bio),
|
|
bio->bi_status)) {
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
return;
|
|
}
|
|
bio->bi_end_io = NULL;
|
|
|
|
if (bio->bi_status) {
|
|
struct inode *inode = io_end->inode;
|
|
|
|
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
|
|
"(offset %llu size %ld starting block %llu)",
|
|
bio->bi_status, inode->i_ino,
|
|
(unsigned long long) io_end->offset,
|
|
(long) io_end->size,
|
|
(unsigned long long)
|
|
bi_sector >> (inode->i_blkbits - 9));
|
|
mapping_set_error(inode->i_mapping,
|
|
blk_status_to_errno(bio->bi_status));
|
|
}
|
|
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
/*
|
|
* Link bio into list hanging from io_end. We have to do it
|
|
* atomically as bio completions can be racing against each
|
|
* other.
|
|
*/
|
|
bio->bi_private = xchg(&io_end->bio, bio);
|
|
ext4_put_io_end_defer(io_end);
|
|
} else {
|
|
/*
|
|
* Drop io_end reference early. Inode can get freed once
|
|
* we finish the bio.
|
|
*/
|
|
ext4_put_io_end_defer(io_end);
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
}
|
|
|
|
void ext4_io_submit(struct ext4_io_submit *io)
|
|
{
|
|
struct bio *bio = io->io_bio;
|
|
|
|
if (bio) {
|
|
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
|
|
REQ_SYNC : 0;
|
|
io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
|
|
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
|
|
submit_bio(io->io_bio);
|
|
}
|
|
io->io_bio = NULL;
|
|
}
|
|
|
|
void ext4_io_submit_init(struct ext4_io_submit *io,
|
|
struct writeback_control *wbc)
|
|
{
|
|
io->io_wbc = wbc;
|
|
io->io_bio = NULL;
|
|
io->io_end = NULL;
|
|
}
|
|
|
|
static int io_submit_init_bio(struct ext4_io_submit *io,
|
|
struct buffer_head *bh)
|
|
{
|
|
struct bio *bio;
|
|
|
|
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
|
if (!bio)
|
|
return -ENOMEM;
|
|
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
|
|
wbc_init_bio(io->io_wbc, bio);
|
|
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
|
bio_set_dev(bio, bh->b_bdev);
|
|
bio->bi_end_io = ext4_end_bio;
|
|
bio->bi_private = ext4_get_io_end(io->io_end);
|
|
io->io_bio = bio;
|
|
io->io_next_block = bh->b_blocknr;
|
|
return 0;
|
|
}
|
|
|
|
static int io_submit_add_bh(struct ext4_io_submit *io,
|
|
struct inode *inode,
|
|
struct page *page,
|
|
struct buffer_head *bh)
|
|
{
|
|
int ret;
|
|
|
|
if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
|
|
!fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
|
|
submit_and_retry:
|
|
ext4_io_submit(io);
|
|
}
|
|
if (io->io_bio == NULL) {
|
|
ret = io_submit_init_bio(io, bh);
|
|
if (ret)
|
|
return ret;
|
|
io->io_bio->bi_write_hint = inode->i_write_hint;
|
|
}
|
|
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
|
|
if (ret != bh->b_size)
|
|
goto submit_and_retry;
|
|
wbc_account_io(io->io_wbc, page, bh->b_size);
|
|
io->io_next_block++;
|
|
return 0;
|
|
}
|
|
|
|
int ext4_bio_write_page(struct ext4_io_submit *io,
|
|
struct page *page,
|
|
int len,
|
|
struct writeback_control *wbc,
|
|
bool keep_towrite)
|
|
{
|
|
struct page *bounce_page = NULL;
|
|
struct inode *inode = page->mapping->host;
|
|
unsigned block_start;
|
|
struct buffer_head *bh, *head;
|
|
int ret = 0;
|
|
int nr_submitted = 0;
|
|
int nr_to_submit = 0;
|
|
|
|
BUG_ON(!PageLocked(page));
|
|
BUG_ON(PageWriteback(page));
|
|
|
|
if (keep_towrite)
|
|
set_page_writeback_keepwrite(page);
|
|
else
|
|
set_page_writeback(page);
|
|
ClearPageError(page);
|
|
|
|
/*
|
|
* Comments copied from block_write_full_page:
|
|
*
|
|
* The page straddles i_size. It must be zeroed out on each and every
|
|
* writepage invocation because it may be mmapped. "A file is mapped
|
|
* in multiples of the page size. For a file that is not a multiple of
|
|
* the page size, the remaining memory is zeroed when mapped, and
|
|
* writes to that region are not written out to the file."
|
|
*/
|
|
if (len < PAGE_SIZE)
|
|
zero_user_segment(page, len, PAGE_SIZE);
|
|
/*
|
|
* In the first loop we prepare and mark buffers to submit. We have to
|
|
* mark all buffers in the page before submitting so that
|
|
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
|
|
* on the first buffer finishes and we are still working on submitting
|
|
* the second buffer.
|
|
*/
|
|
bh = head = page_buffers(page);
|
|
do {
|
|
block_start = bh_offset(bh);
|
|
if (block_start >= len) {
|
|
clear_buffer_dirty(bh);
|
|
set_buffer_uptodate(bh);
|
|
continue;
|
|
}
|
|
if (!buffer_dirty(bh) || buffer_delay(bh) ||
|
|
!buffer_mapped(bh) || buffer_unwritten(bh)) {
|
|
/* A hole? We can safely clear the dirty bit */
|
|
if (!buffer_mapped(bh))
|
|
clear_buffer_dirty(bh);
|
|
if (io->io_bio)
|
|
ext4_io_submit(io);
|
|
continue;
|
|
}
|
|
if (buffer_new(bh)) {
|
|
clear_buffer_new(bh);
|
|
clean_bdev_bh_alias(bh);
|
|
}
|
|
set_buffer_async_write(bh);
|
|
nr_to_submit++;
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
bh = head = page_buffers(page);
|
|
|
|
if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
|
|
gfp_t gfp_flags = GFP_NOFS;
|
|
|
|
/*
|
|
* Since bounce page allocation uses a mempool, we can only use
|
|
* a waiting mask (i.e. request guaranteed allocation) on the
|
|
* first page of the bio. Otherwise it can deadlock.
|
|
*/
|
|
if (io->io_bio)
|
|
gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
|
|
retry_encrypt:
|
|
bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE,
|
|
0, gfp_flags);
|
|
if (IS_ERR(bounce_page)) {
|
|
ret = PTR_ERR(bounce_page);
|
|
if (ret == -ENOMEM &&
|
|
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
|
|
gfp_flags = GFP_NOFS;
|
|
if (io->io_bio)
|
|
ext4_io_submit(io);
|
|
else
|
|
gfp_flags |= __GFP_NOFAIL;
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
goto retry_encrypt;
|
|
}
|
|
bounce_page = NULL;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* Now submit buffers to write */
|
|
do {
|
|
if (!buffer_async_write(bh))
|
|
continue;
|
|
ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
|
|
if (ret) {
|
|
/*
|
|
* We only get here on ENOMEM. Not much else
|
|
* we can do but mark the page as dirty, and
|
|
* better luck next time.
|
|
*/
|
|
break;
|
|
}
|
|
nr_submitted++;
|
|
clear_buffer_dirty(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
/* Error stopped previous loop? Clean up buffers... */
|
|
if (ret) {
|
|
out:
|
|
fscrypt_free_bounce_page(bounce_page);
|
|
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
|
|
redirty_page_for_writepage(wbc, page);
|
|
do {
|
|
clear_buffer_async_write(bh);
|
|
bh = bh->b_this_page;
|
|
} while (bh != head);
|
|
}
|
|
unlock_page(page);
|
|
/* Nothing submitted - we have to end page writeback */
|
|
if (!nr_submitted)
|
|
end_page_writeback(page);
|
|
return ret;
|
|
}
|