* refs/heads/tmp-ead6fb7: Revert "spi: uniphier: fix incorrect property items" Linux 4.19.87 PM / devfreq: Fix kernel oops on governor module load KVM: PPC: Book3S HV: Flush link stack on guest exit to host kernel powerpc/book3s64: Fix link stack flush on context switch powerpc/64s: support nospectre_v2 cmdline option staging: comedi: usbduxfast: usbduxfast_ai_cmdtest rounding error USB: serial: option: add support for Foxconn T77W968 LTE modules USB: serial: option: add support for DW5821e with eSIM support USB: serial: mos7840: fix remote wakeup USB: serial: mos7720: fix remote wakeup USB: serial: mos7840: add USB ID to support Moxa UPort 2210 appledisplay: fix error handling in the scheduled work USB: chaoskey: fix error case of a timeout usb-serial: cp201x: support Mark-10 digital force gauge usbip: Fix uninitialized symbol 'nents' in stub_recv_cmd_submit() usbip: tools: fix fd leakage in the function of read_attr_usbip_status USBIP: add config dependency for SGL_ALLOC virtio_ring: fix return code on DMA mapping fails media: imon: invalid dereference in imon_touch_event media: cxusb: detect cxusb_ctrl_msg error in query media: b2c2-flexcop-usb: add sanity checking media: uvcvideo: Fix error path in control parsing failure cpufreq: Add NULL checks to show() and store() methods of cpufreq media: usbvision: Fix races among open, close, and disconnect media: vivid: Fix wrong locking that causes race conditions on streaming stop media: vivid: Set vid_cap_streaming and vid_out_streaming to true nfc: port100: handle command failure cleanly ALSA: usb-audio: Fix NULL dereference at parsing BADD futex: Prevent robust futex exit race y2038: futex: Move compat implementation into futex.c nbd: prevent memory leak x86/speculation: Fix redundant MDS mitigation message x86/speculation: Fix incorrect MDS/TAA mitigation status x86/insn: Fix awk regexp warnings ARC: perf: Accommodate big-endian CPU ARM: 8904/1: skip nomap memblocks while finding the lowmem/highmem boundary ocfs2: remove ocfs2_is_o2cb_active() net: phy: dp83867: increase SGMII autoneg timer duration net: phy: dp83867: fix speed 10 in sgmii mode mm/memory_hotplug: don't access uninitialized memmaps in shrink_zone_span() md/raid10: prevent access of uninitialized resync_pages offset ath9k_hw: fix uninitialized variable data ath10k: Fix a NULL-ptr-deref bug in ath10k_usb_alloc_urb_from_pipe KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved Bluetooth: Fix invalid-free in bcsp_close() mm/page_io.c: do not free shared swap slots cfg80211: call disconnect_wk when AP stops ipv6: Fix handling of LLA with VRF and sockets bound to VRF mm/memory_hotplug: Do not unlock when fails to take the device_hotplug_lock i2c: uniphier-f: fix timeout error after reading 8 bytes spi: omap2-mcspi: Fix DMA and FIFO event trigger size mismatch nvme-pci: fix surprise removal PCI: keystone: Use quirk to limit MRRS for K2G pinctrl: zynq: Use define directive for PIN_CONFIG_IO_STANDARD pinctrl: lpc18xx: Use define directive for PIN_CONFIG_GPIO_PIN_INT pinctrl: bcm2835: Use define directive for BCM2835_PINCONF_PARAM_PULL pinctrl: qcom: spmi-gpio: fix gpio-hog related boot issues cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces tools: bpftool: pass an argument to silence open_obj_pinned() of: unittest: initialize args before calling of_*parse_*() of: unittest: allow base devicetree to have symbol metadata net: bcmgenet: return correct value 'ret' from bcmgenet_power_down ACPICA: Use %d for signed int print formatting instead of %u clk: tegra20: Turn EMC clock gate into divider vrf: mark skb for multicast or link-local as enslaved to VRF dlm: don't leak kernel pointer to userspace dlm: fix invalid free usb: typec: tcpm: charge current handling for sink during hard reset scsi: lpfc: Correct loss of fc4 type on remote port address change scsi: lpfc: Fix odd recovery in duplicate FLOGIs in point-to-point scsi: lpfc: fcoe: Fix link down issue after 1000+ link bounces scsi: megaraid_sas: Fix goto labels in error handling scsi: megaraid_sas: Fix msleep granularity scsi: mpt3sas: Fix driver modifying persistent data in Manufacturing page11 scsi: mpt3sas: Don't modify EEDPTagMode field setting on SAS3.5 HBA devices scsi: mpt3sas: Fix Sync cache command failure during driver unload net: dsa: bcm_sf2: Turn on PHY to allow successful registration rtlwifi: rtl8192de: Fix misleading REG_MCUFWDL information wireless: airo: potential buffer overflow in sprintf() brcmsmac: never log "tid x is not agg'able" by default rtl8xxxu: Fix missing break in switch wlcore: Fix the return value in case of error in 'wlcore_vendor_cmd_smart_config_start()' ath10k: snoc: fix unbalanced clock error handling wil6210: fix locking in wmi_call wil6210: fix RGF_CAF_ICR address for Talyn-MB wil6210: fix L2 RX status handling wil6210: fix debugfs memory access alignment btrfs: avoid link error with CONFIG_NO_AUTO_INLINE media: ov13858: Check for possible null pointer nds32: Fix bug in bitfield.h net: bpfilter: fix iptables failure if bpfilter_umh is disabled sock_diag: fix autoloading of the raw_diag module audit: print empty EXECVE args soc: bcm: brcmstb: Fix re-entry point with a THUMB2_KERNEL clk: sunxi-ng: enable so-said LDOs for A64 SoC's pll-mipi clock ARM: dts: imx6sx-sdb: Fix enet phy regulator openvswitch: fix linking without CONFIG_NF_CONNTRACK_LABELS sched/fair: Don't increase sd->balance_interval on newidle balance sched/topology: Fix off by one bug net: do not abort bulk send on BQL status ocfs2: fix clusters leak in ocfs2_defrag_extent() ocfs2: don't put and assigning null to bh allocated outside ocfs2: don't use iocb when EIOCBQUEUED returns ocfs2: without quota support, avoid calling quota recovery mm: handle no memcg case in memcg_kmem_charge() properly tools/power turbosat: fix AMD APIC-id output arm64: makefile fix build of .i file in external module case nvme-pci: fix conflicting p2p resource adds irq/matrix: Fix memory overallocation ntb: intel: fix return value for ndev_vec_mask() ntb_netdev: fix sleep time mismatch net: hns3: bugfix for hclge_mdio_write and hclge_mdio_read net: hns3: bugfix for is_valid_csq_clean_head() net: hns3: bugfix for reporting unknown vector0 interrupt repeatly problem net: hns3: bugfix for buffer not free problem during resetting fm10k: ensure completer aborts are marked as non-fatal after a resume igb: shorten maximum PHC timecounter update interval powerpc/powernv: hold device_hotplug_lock when calling device_online() mm/memory_hotplug: fix online/offline_pages called w.o. mem_hotplug_lock mm/memory_hotplug: make add_memory() take the device_hotplug_lock kernel/panic.c: do not append newline to the stack protector panic string fs/hfs/extent.c: fix array out of bounds read of array extent hfs: update timestamp on truncate() hfsplus: update timestamps on truncate() hfs: fix return value of hfs_get_block() hfsplus: fix return value of hfsplus_get_block() hfs: prevent btree data loss on ENOSPC hfsplus: prevent btree data loss on ENOSPC hfs: fix BUG on bnode parent update hfsplus: fix BUG on bnode parent update lib/bitmap.c: fix remaining space computation in bitmap_print_to_pagebuf linux/bitmap.h: fix type of nbits in bitmap_shift_right() linux/bitmap.h: handle constant zero-size bitmaps correctly mm/gup_benchmark.c: prevent integer overflow in ioctl block: call rq_qos_exit() after queue is frozen selftests/powerpc/cache_shape: Fix out-of-tree build selftests/powerpc/switch_endian: Fix out-of-tree build selftests/powerpc/signal: Fix out-of-tree build selftests/powerpc/ptrace: Fix out-of-tree build powerpc/xmon: Relax frame size for clang ipv4/igmp: fix v1/v2 switchback timeout based on rfc3376, 8.12 vfs: avoid problematic remapping requests into partial EOF block um: Make line/tty semantics use true write IRQ i2c: uniphier-f: fix race condition when IRQ is cleared i2c: uniphier-f: fix occasional timeout error i2c: uniphier-f: make driver robust against concurrency block: fix the DISCARD request merge macsec: let the administrator set UP state even if lowerdev is down macsec: update operstate when lower device changes mm: thp: fix MADV_DONTNEED vs migrate_misplaced_transhuge_page race condition tools/testing/selftests/vm/gup_benchmark.c: fix 'write' flag usage mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock fs/ocfs2/dlm/dlmdebug.c: fix a sleep-in-atomic-context bug in dlm_print_one_mle() arm64: lib: use C string functions with KASAN enabled sparc64: Rework xchg() definition to avoid warnings. powerpc/process: Fix flush_all_to_thread for SPE bpf, btf: fix a missing check bug in btf_parse bpf: devmap: fix wrong interface selection in notifier_call net: ethernet: cadence: fix socket buffer corruption problem thermal: rcar_thermal: Prevent hardware access during system suspend thermal: rcar_thermal: fix duplicate IRQ request selftests: fix warning: "_GNU_SOURCE" redefined selftests: kvm: Fix -Wformat warnings selftests: watchdog: Fix error message. selftests: watchdog: fix message when /dev/watchdog open fails selftests/ftrace: Fix to test kprobe $comm arg only if available spi: uniphier: fix incorrect property items fs/cifs: fix uninitialised variable warnings net: socionext: Stop PHY before resetting netsec mfd: max8997: Enale irq-wakeup unconditionally mfd: intel_soc_pmic_bxtwc: Chain power button IRQs as well mfd: mc13xxx-core: Fix PMIC shutdown when reading ADC values mfd: arizona: Correct calling of runtime_put_sync net: ethernet: ti: cpsw: unsync mcast entries while switch promisc mode qlcnic: fix a return in qlcnic_dcb_get_capability() mISDN: Fix type of switch control variable in ctrl_teimanager f2fs: spread f2fs_set_inode_flags() f2fs: fix to spread clear_cold_data() thermal: armada: fix a test in probe() RISC-V: Avoid corrupting the upper 32-bit of phys_addr_t in ioremap rtc: s35390a: Change buf's type to u8 in s35390a_init ceph: only allow punch hole mode in fallocate ceph: fix dentry leak in ceph_readdir_prepopulate tools: bpftool: fix completion for "bpftool map update" selftests/bpf: fix return value comparison for tests in test_libbpf.sh powerpc/64s/radix: Fix radix__flush_tlb_collapsed_pmd double flushing pmd powerpc/mm/radix: Fix small page at boundary when splitting powerpc/mm/radix: Fix overuse of small pages in splitting logic powerpc/mm/radix: Fix off-by-one in split mapping logic powerpc/pseries: Export raw per-CPU VPA data via debugfs scsi: hisi_sas: Fix NULL pointer dereference sparc: Fix parport build warnings. x86/intel_rdt: Prevent pseudo-locking from using stale pointers spi: omap2-mcspi: Set FIFO DMA trigger level to word length swiotlb: do not panic on mapping failures s390/perf: Return error when debug_register fails atm: zatm: Fix empty body Clang warnings sunrpc: safely reallow resvport min/max inversion SUNRPC: Fix a compile warning for cmpxchg64() selftests/bpf: fix file resource leak in load_kallsyms dm raid: avoid bitmap with raid4/5/6 journal device sctp: use sk_wmem_queued to check for writable space usbip: tools: fix atoi() on non-null terminated string USB: misc: appledisplay: fix backlight update_status return code PCI: vmd: Detach resources after stopping root bus macintosh/windfarm_smu_sat: Fix debug output ALSA: i2c/cs8427: Fix int to char conversion PM / Domains: Deal with multiple states but no governor in genpd ACPI / scan: Create platform device for INT33FE ACPI nodes kprobes, x86/ptrace.h: Make regs_get_kernel_stack_nth() not fault on bad stack xfs: clear ail delwri queued bufs on unmount of shutdown fs xfs: fix use-after-free race in xfs_buf_rele net: ena: Fix Kconfig dependency on X86 net: fix warning in af_unix net: dsa: mv88e6xxx: Fix 88E6141/6341 2500mbps SERDES speed scsi: zorro_esp: Limit DMA transfers to 65535 bytes scsi: dc395x: fix DMA API usage in sg_update_list scsi: dc395x: fix dma API usage in srb_done ASoC: tegra_sgtl5000: fix device_node refcounting clk: at91: audio-pll: fix audio pmc type clk: mmp2: fix the clock id for sdh2_clk and sdh3_clk PCI: mediatek: Fixup MSI enablement logic by enabling MSI before clocks nvme-pci: fix hot removal during error handling nvmet-fcloop: suppress a compiler warning nvmet: avoid integer overflow in the discard code crypto: ccree - avoid implicit enum conversion scsi: iscsi_tcp: Explicitly cast param in iscsi_sw_tcp_host_get_param scsi: bfa: Avoid implicit enum conversion in bfad_im_post_vendor_event scsi: isci: Change sci_controller_start_task's return type to sci_status scsi: isci: Use proper enumerated type in atapi_d2h_reg_frame_handler clk: tegra: Fixes for MBIST work around KVM/x86: Fix invvpid and invept register operand size in 64-bit mode KVM: nVMX: move check_vmentry_postreqs() call to nested_vmx_enter_non_root_mode() KVM: nVMX: reset cache/shadows when switching loaded VMCS nfp: bpf: protect against mis-initializing atomic counters scsi: ips: fix missing break in switch qed: Align local and global PTT to propagate through the APIs. amiflop: clean up on errors during setup pwm: lpss: Only set update bit if we are actually changing the settings pinctrl: sunxi: Fix a memory leak in 'sunxi_pinctrl_build_state()' RDMA/bnxt_re: Avoid resource leak in case the NQ registration fails RDMA/bnxt_re: Fix qp async event reporting RDMA/bnxt_re: Avoid NULL check after accessing the pointer scsi: hisi_sas: Free slot later in slot_complete_vx_hw() scsi: hisi_sas: Fix the race between IO completion and timeout for SMP/internal IO scsi: hisi_sas: Feed back linkrate(max/min) when re-attached m68k: fix command-line parsing when passed from u-boot w1: IAD Register is yet readable trough iad sys file. Fix snprintf (%u for unsigned, count for max size). misc: mic: fix a DMA pool free failure gsmi: Fix bug in append_to_eventlog sysfs handler btrfs: handle error of get_old_root btrfs: defrag: use btrfs_mod_outstanding_extents in cluster_pages_for_defrag PCI: mediatek: Fix class type for MT7622 to PCI_CLASS_BRIDGE_PCI mmc: mediatek: fix cannot receive new request when msdc_cmd_is_ready fail mmc: mediatek: fill the actual clock for mmc debugfs spi: sh-msiof: fix deferred probing cdrom: don't attempt to fiddle with cdo->capability skd: fixup usage of legacy IO API ath10k: allocate small size dma memory in ath10k_pci_diag_write_mem ath10k: set probe request oui during driver start brcmsmac: AP mode: update beacon when TIM changes mt76x0: phy: fix restore phase in mt76x0_phy_recalibrate_after_assoc mt76: do not store aggregation sequence number for null-data frames EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr() powerpc/eeh: Fix use of EEH_PE_KEEP on wrong field powerpc/eeh: Fix null deref for devices removed during EEH powerpc/boot: Disable vector instructions powerpc/boot: Fix opal console in boot wrapper powerpc: Fix signedness bug in update_flash_db() synclink_gt(): fix compat_ioctl() pty: fix compat ioctls gfs2: Fix marking bitmaps non-full PCI: cadence: Write MSI data with 32bits pinctrl: madera: Fix uninitialized variable bug in madera_mux_set_mux printk: fix integer overflow in setup_log_buf() printk: lock/unlock console only for new logbuf entries crypto: testmgr - fix sizeof() on COMP_BUF_SIZE ALSA: isight: fix leak of reference to firewire unit in error path of .probe callback mwifiex: Fix NL80211_TX_POWER_LIMITED drm/i915/userptr: Try to acquire the page lock around set_page_dirty() drm/i915/pmu: "Frequency" is reported as accumulated cycles drm/amd/powerplay: issue no PPSMC_MSG_GetCurrPkgPwr on unsupported ASICs mm/ksm.c: don't WARN if page is still mapped in remove_stable_node() Revert "fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()" virtio_console: allocate inbufs in add_port() only if it is needed nbd:fix memory leak in nbd_get_socket() tools: gpio: Correctly add make dependencies for gpio_utils gpio: max77620: Fixup debounce delays vhost/vsock: split packets to send using multiple buffers net/mlx5: Fix auto group size calculation net/mlxfw: Verify FSM error code translation doesn't exceed array size net/mlx5e: Fix set vf link state error flow sfc: Only cancel the PPS workqueue if it exists net: sched: ensure opts_len <= IP_TUNNEL_OPTS_MAX in act_tunnel_key net/sched: act_pedit: fix WARN() in the traffic path net: rtnetlink: prevent underflows in do_setvfinfo() net/mlx4_en: Fix wrong limitation for number of TX rings net/mlx4_en: fix mlx4 ethtool -N insertion mlxsw: spectrum_router: Fix determining underlay for a GRE tunnel Conflicts: block/blk-merge.c drivers/net/wireless/ath/wil6210/main.c drivers/pinctrl/qcom/pinctrl-spmi-gpio.c Change-Id: I2055f0bc1eb4ac6b7ade99e91f84bf2e4f4ea7c4 Signed-off-by: Ivaylo Georgiev <irgeorgiev@codeaurora.org>
436 lines
10 KiB
C
436 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/mm/page_io.c
|
|
*
|
|
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
|
*
|
|
* Swap reorganised 29.12.95,
|
|
* Asynchronous swapping added 30.12.95. Stephen Tweedie
|
|
* Removed race in async swapping. 14.4.1996. Bruno Haible
|
|
* Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
|
|
* Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/swapops.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/frontswap.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/sched/task.h>
|
|
#include <asm/pgtable.h>
|
|
|
|
static struct bio *get_swap_bio(gfp_t gfp_flags,
|
|
struct page *page, bio_end_io_t end_io)
|
|
{
|
|
int i, nr = hpage_nr_pages(page);
|
|
struct bio *bio;
|
|
|
|
bio = bio_alloc(gfp_flags, nr);
|
|
if (bio) {
|
|
struct block_device *bdev;
|
|
|
|
bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
|
|
bio_set_dev(bio, bdev);
|
|
bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
|
|
bio->bi_end_io = end_io;
|
|
|
|
for (i = 0; i < nr; i++)
|
|
bio_add_page(bio, page + i, PAGE_SIZE, 0);
|
|
VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
|
|
}
|
|
return bio;
|
|
}
|
|
|
|
void end_swap_bio_write(struct bio *bio)
|
|
{
|
|
struct page *page = bio_first_page_all(bio);
|
|
|
|
if (bio->bi_status) {
|
|
SetPageError(page);
|
|
/*
|
|
* We failed to write the page out to swap-space.
|
|
* Re-dirty the page in order to avoid it being reclaimed.
|
|
* Also print a dire warning that things will go BAD (tm)
|
|
* very quickly.
|
|
*
|
|
* Also clear PG_reclaim to avoid rotate_reclaimable_page()
|
|
*/
|
|
set_page_dirty(page);
|
|
pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
|
|
MAJOR(bio_dev(bio)),
|
|
MINOR(bio_dev(bio)),
|
|
(unsigned long long)bio->bi_iter.bi_sector);
|
|
ClearPageReclaim(page);
|
|
}
|
|
end_page_writeback(page);
|
|
bio_put(bio);
|
|
}
|
|
|
|
static void swap_slot_free_notify(struct page *page)
|
|
{
|
|
struct swap_info_struct *sis;
|
|
struct gendisk *disk;
|
|
swp_entry_t entry;
|
|
|
|
/*
|
|
* There is no guarantee that the page is in swap cache - the software
|
|
* suspend code (at least) uses end_swap_bio_read() against a non-
|
|
* swapcache page. So we must check PG_swapcache before proceeding with
|
|
* this optimization.
|
|
*/
|
|
if (unlikely(!PageSwapCache(page)))
|
|
return;
|
|
|
|
sis = page_swap_info(page);
|
|
if (!(sis->flags & SWP_BLKDEV))
|
|
return;
|
|
|
|
/*
|
|
* The swap subsystem performs lazy swap slot freeing,
|
|
* expecting that the page will be swapped out again.
|
|
* So we can avoid an unnecessary write if the page
|
|
* isn't redirtied.
|
|
* This is good for real swap storage because we can
|
|
* reduce unnecessary I/O and enhance wear-leveling
|
|
* if an SSD is used as the as swap device.
|
|
* But if in-memory swap device (eg zram) is used,
|
|
* this causes a duplicated copy between uncompressed
|
|
* data in VM-owned memory and compressed data in
|
|
* zram-owned memory. So let's free zram-owned memory
|
|
* and make the VM-owned decompressed page *dirty*,
|
|
* so the page should be swapped out somewhere again if
|
|
* we again wish to reclaim it.
|
|
*/
|
|
disk = sis->bdev->bd_disk;
|
|
entry.val = page_private(page);
|
|
if (disk->fops->swap_slot_free_notify &&
|
|
__swap_count(sis, entry) == 1) {
|
|
unsigned long offset;
|
|
|
|
offset = swp_offset(entry);
|
|
|
|
SetPageDirty(page);
|
|
disk->fops->swap_slot_free_notify(sis->bdev,
|
|
offset);
|
|
}
|
|
}
|
|
|
|
static void end_swap_bio_read(struct bio *bio)
|
|
{
|
|
struct page *page = bio_first_page_all(bio);
|
|
struct task_struct *waiter = bio->bi_private;
|
|
|
|
if (bio->bi_status) {
|
|
SetPageError(page);
|
|
ClearPageUptodate(page);
|
|
pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
|
|
MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
|
|
(unsigned long long)bio->bi_iter.bi_sector);
|
|
goto out;
|
|
}
|
|
|
|
SetPageUptodate(page);
|
|
swap_slot_free_notify(page);
|
|
out:
|
|
unlock_page(page);
|
|
WRITE_ONCE(bio->bi_private, NULL);
|
|
bio_put(bio);
|
|
wake_up_process(waiter);
|
|
put_task_struct(waiter);
|
|
}
|
|
|
|
int generic_swapfile_activate(struct swap_info_struct *sis,
|
|
struct file *swap_file,
|
|
sector_t *span)
|
|
{
|
|
struct address_space *mapping = swap_file->f_mapping;
|
|
struct inode *inode = mapping->host;
|
|
unsigned blocks_per_page;
|
|
unsigned long page_no;
|
|
unsigned blkbits;
|
|
sector_t probe_block;
|
|
sector_t last_block;
|
|
sector_t lowest_block = -1;
|
|
sector_t highest_block = 0;
|
|
int nr_extents = 0;
|
|
int ret;
|
|
|
|
blkbits = inode->i_blkbits;
|
|
blocks_per_page = PAGE_SIZE >> blkbits;
|
|
|
|
/*
|
|
* Map all the blocks into the extent list. This code doesn't try
|
|
* to be very smart.
|
|
*/
|
|
probe_block = 0;
|
|
page_no = 0;
|
|
last_block = i_size_read(inode) >> blkbits;
|
|
while ((probe_block + blocks_per_page) <= last_block &&
|
|
page_no < sis->max) {
|
|
unsigned block_in_page;
|
|
sector_t first_block;
|
|
|
|
cond_resched();
|
|
|
|
first_block = bmap(inode, probe_block);
|
|
if (first_block == 0)
|
|
goto bad_bmap;
|
|
|
|
/*
|
|
* It must be PAGE_SIZE aligned on-disk
|
|
*/
|
|
if (first_block & (blocks_per_page - 1)) {
|
|
probe_block++;
|
|
goto reprobe;
|
|
}
|
|
|
|
for (block_in_page = 1; block_in_page < blocks_per_page;
|
|
block_in_page++) {
|
|
sector_t block;
|
|
|
|
block = bmap(inode, probe_block + block_in_page);
|
|
if (block == 0)
|
|
goto bad_bmap;
|
|
if (block != first_block + block_in_page) {
|
|
/* Discontiguity */
|
|
probe_block++;
|
|
goto reprobe;
|
|
}
|
|
}
|
|
|
|
first_block >>= (PAGE_SHIFT - blkbits);
|
|
if (page_no) { /* exclude the header page */
|
|
if (first_block < lowest_block)
|
|
lowest_block = first_block;
|
|
if (first_block > highest_block)
|
|
highest_block = first_block;
|
|
}
|
|
|
|
/*
|
|
* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
|
|
*/
|
|
ret = add_swap_extent(sis, page_no, 1, first_block);
|
|
if (ret < 0)
|
|
goto out;
|
|
nr_extents += ret;
|
|
page_no++;
|
|
probe_block += blocks_per_page;
|
|
reprobe:
|
|
continue;
|
|
}
|
|
ret = nr_extents;
|
|
*span = 1 + highest_block - lowest_block;
|
|
if (page_no == 0)
|
|
page_no = 1; /* force Empty message */
|
|
sis->max = page_no;
|
|
sis->pages = page_no - 1;
|
|
sis->highest_bit = page_no - 1;
|
|
out:
|
|
return ret;
|
|
bad_bmap:
|
|
pr_err("swapon: swapfile has holes\n");
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* We may have stale swap cache pages in memory: notice
|
|
* them here and get rid of the unnecessary final write.
|
|
*/
|
|
int swap_writepage(struct page *page, struct writeback_control *wbc)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (try_to_free_swap(page)) {
|
|
unlock_page(page);
|
|
goto out;
|
|
}
|
|
if (frontswap_store(page) == 0) {
|
|
set_page_writeback(page);
|
|
unlock_page(page);
|
|
end_page_writeback(page);
|
|
goto out;
|
|
}
|
|
ret = __swap_writepage(page, wbc, end_swap_bio_write);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static sector_t swap_page_sector(struct page *page)
|
|
{
|
|
return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
|
|
}
|
|
|
|
static inline void count_swpout_vm_event(struct page *page)
|
|
{
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
if (unlikely(PageTransHuge(page)))
|
|
count_vm_event(THP_SWPOUT);
|
|
#endif
|
|
count_vm_events(PSWPOUT, hpage_nr_pages(page));
|
|
}
|
|
|
|
int __swap_writepage(struct page *page, struct writeback_control *wbc,
|
|
bio_end_io_t end_write_func)
|
|
{
|
|
struct bio *bio;
|
|
int ret;
|
|
struct swap_info_struct *sis = page_swap_info(page);
|
|
|
|
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
|
|
if (sis->flags & SWP_FILE) {
|
|
struct kiocb kiocb;
|
|
struct file *swap_file = sis->swap_file;
|
|
struct address_space *mapping = swap_file->f_mapping;
|
|
struct bio_vec bv = {
|
|
.bv_page = page,
|
|
.bv_len = PAGE_SIZE,
|
|
.bv_offset = 0
|
|
};
|
|
struct iov_iter from;
|
|
|
|
iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
|
|
init_sync_kiocb(&kiocb, swap_file);
|
|
kiocb.ki_pos = page_file_offset(page);
|
|
|
|
set_page_writeback(page);
|
|
unlock_page(page);
|
|
ret = mapping->a_ops->direct_IO(&kiocb, &from);
|
|
if (ret == PAGE_SIZE) {
|
|
count_vm_event(PSWPOUT);
|
|
ret = 0;
|
|
} else {
|
|
/*
|
|
* In the case of swap-over-nfs, this can be a
|
|
* temporary failure if the system has limited
|
|
* memory for allocating transmit buffers.
|
|
* Mark the page dirty and avoid
|
|
* rotate_reclaimable_page but rate-limit the
|
|
* messages but do not flag PageError like
|
|
* the normal direct-to-bio case as it could
|
|
* be temporary.
|
|
*/
|
|
set_page_dirty(page);
|
|
ClearPageReclaim(page);
|
|
pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
|
|
page_file_offset(page));
|
|
}
|
|
end_page_writeback(page);
|
|
return ret;
|
|
}
|
|
|
|
ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
|
|
if (!ret) {
|
|
count_swpout_vm_event(page);
|
|
return 0;
|
|
}
|
|
|
|
ret = 0;
|
|
bio = get_swap_bio(GFP_NOIO, page, end_write_func);
|
|
if (bio == NULL) {
|
|
set_page_dirty(page);
|
|
unlock_page(page);
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
|
|
bio_associate_blkcg_from_page(bio, page);
|
|
count_swpout_vm_event(page);
|
|
set_page_writeback(page);
|
|
unlock_page(page);
|
|
submit_bio(bio);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
int swap_readpage(struct page *page, bool synchronous)
|
|
{
|
|
struct bio *bio;
|
|
int ret = 0;
|
|
struct swap_info_struct *sis = page_swap_info(page);
|
|
blk_qc_t qc;
|
|
struct gendisk *disk;
|
|
|
|
VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_PAGE(PageUptodate(page), page);
|
|
if (frontswap_load(page) == 0) {
|
|
SetPageUptodate(page);
|
|
unlock_page(page);
|
|
goto out;
|
|
}
|
|
|
|
if (sis->flags & SWP_FILE) {
|
|
struct file *swap_file = sis->swap_file;
|
|
struct address_space *mapping = swap_file->f_mapping;
|
|
|
|
ret = mapping->a_ops->readpage(swap_file, page);
|
|
if (!ret)
|
|
count_vm_event(PSWPIN);
|
|
return ret;
|
|
}
|
|
|
|
ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
|
|
if (!ret) {
|
|
if (trylock_page(page)) {
|
|
swap_slot_free_notify(page);
|
|
unlock_page(page);
|
|
}
|
|
|
|
count_vm_event(PSWPIN);
|
|
return 0;
|
|
}
|
|
|
|
ret = 0;
|
|
bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
|
|
if (bio == NULL) {
|
|
unlock_page(page);
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
disk = bio->bi_disk;
|
|
/*
|
|
* Keep this task valid during swap readpage because the oom killer may
|
|
* attempt to access it in the page fault retry time check.
|
|
*/
|
|
get_task_struct(current);
|
|
bio->bi_private = current;
|
|
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
|
count_vm_event(PSWPIN);
|
|
bio_get(bio);
|
|
qc = submit_bio(bio);
|
|
while (synchronous) {
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
if (!READ_ONCE(bio->bi_private))
|
|
break;
|
|
|
|
if (!blk_poll(disk->queue, qc))
|
|
break;
|
|
}
|
|
__set_current_state(TASK_RUNNING);
|
|
bio_put(bio);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
int swap_set_page_dirty(struct page *page)
|
|
{
|
|
struct swap_info_struct *sis = page_swap_info(page);
|
|
|
|
if (sis->flags & SWP_FILE) {
|
|
struct address_space *mapping = sis->swap_file->f_mapping;
|
|
|
|
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
|
|
return mapping->a_ops->set_page_dirty(page);
|
|
} else {
|
|
return __set_page_dirty_no_writeback(page);
|
|
}
|
|
}
|