kernel-fxtec-pro1x/mm/page_io.c
Ivaylo Georgiev 5fd5fbe20d Merge android-4.19-q.87 (ead6fb7) into msm-4.19
* refs/heads/tmp-ead6fb7:
  Revert "spi: uniphier: fix incorrect property items"
  Linux 4.19.87
  PM / devfreq: Fix kernel oops on governor module load
  KVM: PPC: Book3S HV: Flush link stack on guest exit to host kernel
  powerpc/book3s64: Fix link stack flush on context switch
  powerpc/64s: support nospectre_v2 cmdline option
  staging: comedi: usbduxfast: usbduxfast_ai_cmdtest rounding error
  USB: serial: option: add support for Foxconn T77W968 LTE modules
  USB: serial: option: add support for DW5821e with eSIM support
  USB: serial: mos7840: fix remote wakeup
  USB: serial: mos7720: fix remote wakeup
  USB: serial: mos7840: add USB ID to support Moxa UPort 2210
  appledisplay: fix error handling in the scheduled work
  USB: chaoskey: fix error case of a timeout
  usb-serial: cp201x: support Mark-10 digital force gauge
  usbip: Fix uninitialized symbol 'nents' in stub_recv_cmd_submit()
  usbip: tools: fix fd leakage in the function of read_attr_usbip_status
  USBIP: add config dependency for SGL_ALLOC
  virtio_ring: fix return code on DMA mapping fails
  media: imon: invalid dereference in imon_touch_event
  media: cxusb: detect cxusb_ctrl_msg error in query
  media: b2c2-flexcop-usb: add sanity checking
  media: uvcvideo: Fix error path in control parsing failure
  cpufreq: Add NULL checks to show() and store() methods of cpufreq
  media: usbvision: Fix races among open, close, and disconnect
  media: vivid: Fix wrong locking that causes race conditions on streaming stop
  media: vivid: Set vid_cap_streaming and vid_out_streaming to true
  nfc: port100: handle command failure cleanly
  ALSA: usb-audio: Fix NULL dereference at parsing BADD
  futex: Prevent robust futex exit race
  y2038: futex: Move compat implementation into futex.c
  nbd: prevent memory leak
  x86/speculation: Fix redundant MDS mitigation message
  x86/speculation: Fix incorrect MDS/TAA mitigation status
  x86/insn: Fix awk regexp warnings
  ARC: perf: Accommodate big-endian CPU
  ARM: 8904/1: skip nomap memblocks while finding the lowmem/highmem boundary
  ocfs2: remove ocfs2_is_o2cb_active()
  net: phy: dp83867: increase SGMII autoneg timer duration
  net: phy: dp83867: fix speed 10 in sgmii mode
  mm/memory_hotplug: don't access uninitialized memmaps in shrink_zone_span()
  md/raid10: prevent access of uninitialized resync_pages offset
  ath9k_hw: fix uninitialized variable data
  ath10k: Fix a NULL-ptr-deref bug in ath10k_usb_alloc_urb_from_pipe
  KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved
  Bluetooth: Fix invalid-free in bcsp_close()
  mm/page_io.c: do not free shared swap slots
  cfg80211: call disconnect_wk when AP stops
  ipv6: Fix handling of LLA with VRF and sockets bound to VRF
  mm/memory_hotplug: Do not unlock when fails to take the device_hotplug_lock
  i2c: uniphier-f: fix timeout error after reading 8 bytes
  spi: omap2-mcspi: Fix DMA and FIFO event trigger size mismatch
  nvme-pci: fix surprise removal
  PCI: keystone: Use quirk to limit MRRS for K2G
  pinctrl: zynq: Use define directive for PIN_CONFIG_IO_STANDARD
  pinctrl: lpc18xx: Use define directive for PIN_CONFIG_GPIO_PIN_INT
  pinctrl: bcm2835: Use define directive for BCM2835_PINCONF_PARAM_PULL
  pinctrl: qcom: spmi-gpio: fix gpio-hog related boot issues
  cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces
  tools: bpftool: pass an argument to silence open_obj_pinned()
  of: unittest: initialize args before calling of_*parse_*()
  of: unittest: allow base devicetree to have symbol metadata
  net: bcmgenet: return correct value 'ret' from bcmgenet_power_down
  ACPICA: Use %d for signed int print formatting instead of %u
  clk: tegra20: Turn EMC clock gate into divider
  vrf: mark skb for multicast or link-local as enslaved to VRF
  dlm: don't leak kernel pointer to userspace
  dlm: fix invalid free
  usb: typec: tcpm: charge current handling for sink during hard reset
  scsi: lpfc: Correct loss of fc4 type on remote port address change
  scsi: lpfc: Fix odd recovery in duplicate FLOGIs in point-to-point
  scsi: lpfc: fcoe: Fix link down issue after 1000+ link bounces
  scsi: megaraid_sas: Fix goto labels in error handling
  scsi: megaraid_sas: Fix msleep granularity
  scsi: mpt3sas: Fix driver modifying persistent data in Manufacturing page11
  scsi: mpt3sas: Don't modify EEDPTagMode field setting on SAS3.5 HBA devices
  scsi: mpt3sas: Fix Sync cache command failure during driver unload
  net: dsa: bcm_sf2: Turn on PHY to allow successful registration
  rtlwifi: rtl8192de: Fix misleading REG_MCUFWDL information
  wireless: airo: potential buffer overflow in sprintf()
  brcmsmac: never log "tid x is not agg'able" by default
  rtl8xxxu: Fix missing break in switch
  wlcore: Fix the return value in case of error in 'wlcore_vendor_cmd_smart_config_start()'
  ath10k: snoc: fix unbalanced clock error handling
  wil6210: fix locking in wmi_call
  wil6210: fix RGF_CAF_ICR address for Talyn-MB
  wil6210: fix L2 RX status handling
  wil6210: fix debugfs memory access alignment
  btrfs: avoid link error with CONFIG_NO_AUTO_INLINE
  media: ov13858: Check for possible null pointer
  nds32: Fix bug in bitfield.h
  net: bpfilter: fix iptables failure if bpfilter_umh is disabled
  sock_diag: fix autoloading of the raw_diag module
  audit: print empty EXECVE args
  soc: bcm: brcmstb: Fix re-entry point with a THUMB2_KERNEL
  clk: sunxi-ng: enable so-said LDOs for A64 SoC's pll-mipi clock
  ARM: dts: imx6sx-sdb: Fix enet phy regulator
  openvswitch: fix linking without CONFIG_NF_CONNTRACK_LABELS
  sched/fair: Don't increase sd->balance_interval on newidle balance
  sched/topology: Fix off by one bug
  net: do not abort bulk send on BQL status
  ocfs2: fix clusters leak in ocfs2_defrag_extent()
  ocfs2: don't put and assigning null to bh allocated outside
  ocfs2: don't use iocb when EIOCBQUEUED returns
  ocfs2: without quota support, avoid calling quota recovery
  mm: handle no memcg case in memcg_kmem_charge() properly
  tools/power turbosat: fix AMD APIC-id output
  arm64: makefile fix build of .i file in external module case
  nvme-pci: fix conflicting p2p resource adds
  irq/matrix: Fix memory overallocation
  ntb: intel: fix return value for ndev_vec_mask()
  ntb_netdev: fix sleep time mismatch
  net: hns3: bugfix for hclge_mdio_write and hclge_mdio_read
  net: hns3: bugfix for is_valid_csq_clean_head()
  net: hns3: bugfix for reporting unknown vector0 interrupt repeatly problem
  net: hns3: bugfix for buffer not free problem during resetting
  fm10k: ensure completer aborts are marked as non-fatal after a resume
  igb: shorten maximum PHC timecounter update interval
  powerpc/powernv: hold device_hotplug_lock when calling device_online()
  mm/memory_hotplug: fix online/offline_pages called w.o. mem_hotplug_lock
  mm/memory_hotplug: make add_memory() take the device_hotplug_lock
  kernel/panic.c: do not append newline to the stack protector panic string
  fs/hfs/extent.c: fix array out of bounds read of array extent
  hfs: update timestamp on truncate()
  hfsplus: update timestamps on truncate()
  hfs: fix return value of hfs_get_block()
  hfsplus: fix return value of hfsplus_get_block()
  hfs: prevent btree data loss on ENOSPC
  hfsplus: prevent btree data loss on ENOSPC
  hfs: fix BUG on bnode parent update
  hfsplus: fix BUG on bnode parent update
  lib/bitmap.c: fix remaining space computation in bitmap_print_to_pagebuf
  linux/bitmap.h: fix type of nbits in bitmap_shift_right()
  linux/bitmap.h: handle constant zero-size bitmaps correctly
  mm/gup_benchmark.c: prevent integer overflow in ioctl
  block: call rq_qos_exit() after queue is frozen
  selftests/powerpc/cache_shape: Fix out-of-tree build
  selftests/powerpc/switch_endian: Fix out-of-tree build
  selftests/powerpc/signal: Fix out-of-tree build
  selftests/powerpc/ptrace: Fix out-of-tree build
  powerpc/xmon: Relax frame size for clang
  ipv4/igmp: fix v1/v2 switchback timeout based on rfc3376, 8.12
  vfs: avoid problematic remapping requests into partial EOF block
  um: Make line/tty semantics use true write IRQ
  i2c: uniphier-f: fix race condition when IRQ is cleared
  i2c: uniphier-f: fix occasional timeout error
  i2c: uniphier-f: make driver robust against concurrency
  block: fix the DISCARD request merge
  macsec: let the administrator set UP state even if lowerdev is down
  macsec: update operstate when lower device changes
  mm: thp: fix MADV_DONTNEED vs migrate_misplaced_transhuge_page race condition
  tools/testing/selftests/vm/gup_benchmark.c: fix 'write' flag usage
  mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock
  fs/ocfs2/dlm/dlmdebug.c: fix a sleep-in-atomic-context bug in dlm_print_one_mle()
  arm64: lib: use C string functions with KASAN enabled
  sparc64: Rework xchg() definition to avoid warnings.
  powerpc/process: Fix flush_all_to_thread for SPE
  bpf, btf: fix a missing check bug in btf_parse
  bpf: devmap: fix wrong interface selection in notifier_call
  net: ethernet: cadence: fix socket buffer corruption problem
  thermal: rcar_thermal: Prevent hardware access during system suspend
  thermal: rcar_thermal: fix duplicate IRQ request
  selftests: fix warning: "_GNU_SOURCE" redefined
  selftests: kvm: Fix -Wformat warnings
  selftests: watchdog: Fix error message.
  selftests: watchdog: fix message when /dev/watchdog open fails
  selftests/ftrace: Fix to test kprobe $comm arg only if available
  spi: uniphier: fix incorrect property items
  fs/cifs: fix uninitialised variable warnings
  net: socionext: Stop PHY before resetting netsec
  mfd: max8997: Enale irq-wakeup unconditionally
  mfd: intel_soc_pmic_bxtwc: Chain power button IRQs as well
  mfd: mc13xxx-core: Fix PMIC shutdown when reading ADC values
  mfd: arizona: Correct calling of runtime_put_sync
  net: ethernet: ti: cpsw: unsync mcast entries while switch promisc mode
  qlcnic: fix a return in qlcnic_dcb_get_capability()
  mISDN: Fix type of switch control variable in ctrl_teimanager
  f2fs: spread f2fs_set_inode_flags()
  f2fs: fix to spread clear_cold_data()
  thermal: armada: fix a test in probe()
  RISC-V: Avoid corrupting the upper 32-bit of phys_addr_t in ioremap
  rtc: s35390a: Change buf's type to u8 in s35390a_init
  ceph: only allow punch hole mode in fallocate
  ceph: fix dentry leak in ceph_readdir_prepopulate
  tools: bpftool: fix completion for "bpftool map update"
  selftests/bpf: fix return value comparison for tests in test_libbpf.sh
  powerpc/64s/radix: Fix radix__flush_tlb_collapsed_pmd double flushing pmd
  powerpc/mm/radix: Fix small page at boundary when splitting
  powerpc/mm/radix: Fix overuse of small pages in splitting logic
  powerpc/mm/radix: Fix off-by-one in split mapping logic
  powerpc/pseries: Export raw per-CPU VPA data via debugfs
  scsi: hisi_sas: Fix NULL pointer dereference
  sparc: Fix parport build warnings.
  x86/intel_rdt: Prevent pseudo-locking from using stale pointers
  spi: omap2-mcspi: Set FIFO DMA trigger level to word length
  swiotlb: do not panic on mapping failures
  s390/perf: Return error when debug_register fails
  atm: zatm: Fix empty body Clang warnings
  sunrpc: safely reallow resvport min/max inversion
  SUNRPC: Fix a compile warning for cmpxchg64()
  selftests/bpf: fix file resource leak in load_kallsyms
  dm raid: avoid bitmap with raid4/5/6 journal device
  sctp: use sk_wmem_queued to check for writable space
  usbip: tools: fix atoi() on non-null terminated string
  USB: misc: appledisplay: fix backlight update_status return code
  PCI: vmd: Detach resources after stopping root bus
  macintosh/windfarm_smu_sat: Fix debug output
  ALSA: i2c/cs8427: Fix int to char conversion
  PM / Domains: Deal with multiple states but no governor in genpd
  ACPI / scan: Create platform device for INT33FE ACPI nodes
  kprobes, x86/ptrace.h: Make regs_get_kernel_stack_nth() not fault on bad stack
  xfs: clear ail delwri queued bufs on unmount of shutdown fs
  xfs: fix use-after-free race in xfs_buf_rele
  net: ena: Fix Kconfig dependency on X86
  net: fix warning in af_unix
  net: dsa: mv88e6xxx: Fix 88E6141/6341 2500mbps SERDES speed
  scsi: zorro_esp: Limit DMA transfers to 65535 bytes
  scsi: dc395x: fix DMA API usage in sg_update_list
  scsi: dc395x: fix dma API usage in srb_done
  ASoC: tegra_sgtl5000: fix device_node refcounting
  clk: at91: audio-pll: fix audio pmc type
  clk: mmp2: fix the clock id for sdh2_clk and sdh3_clk
  PCI: mediatek: Fixup MSI enablement logic by enabling MSI before clocks
  nvme-pci: fix hot removal during error handling
  nvmet-fcloop: suppress a compiler warning
  nvmet: avoid integer overflow in the discard code
  crypto: ccree - avoid implicit enum conversion
  scsi: iscsi_tcp: Explicitly cast param in iscsi_sw_tcp_host_get_param
  scsi: bfa: Avoid implicit enum conversion in bfad_im_post_vendor_event
  scsi: isci: Change sci_controller_start_task's return type to sci_status
  scsi: isci: Use proper enumerated type in atapi_d2h_reg_frame_handler
  clk: tegra: Fixes for MBIST work around
  KVM/x86: Fix invvpid and invept register operand size in 64-bit mode
  KVM: nVMX: move check_vmentry_postreqs() call to nested_vmx_enter_non_root_mode()
  KVM: nVMX: reset cache/shadows when switching loaded VMCS
  nfp: bpf: protect against mis-initializing atomic counters
  scsi: ips: fix missing break in switch
  qed: Align local and global PTT to propagate through the APIs.
  amiflop: clean up on errors during setup
  pwm: lpss: Only set update bit if we are actually changing the settings
  pinctrl: sunxi: Fix a memory leak in 'sunxi_pinctrl_build_state()'
  RDMA/bnxt_re: Avoid resource leak in case the NQ registration fails
  RDMA/bnxt_re: Fix qp async event reporting
  RDMA/bnxt_re: Avoid NULL check after accessing the pointer
  scsi: hisi_sas: Free slot later in slot_complete_vx_hw()
  scsi: hisi_sas: Fix the race between IO completion and timeout for SMP/internal IO
  scsi: hisi_sas: Feed back linkrate(max/min) when re-attached
  m68k: fix command-line parsing when passed from u-boot
  w1: IAD Register is yet readable trough iad sys file. Fix snprintf (%u for unsigned, count for max size).
  misc: mic: fix a DMA pool free failure
  gsmi: Fix bug in append_to_eventlog sysfs handler
  btrfs: handle error of get_old_root
  btrfs: defrag: use btrfs_mod_outstanding_extents in cluster_pages_for_defrag
  PCI: mediatek: Fix class type for MT7622 to PCI_CLASS_BRIDGE_PCI
  mmc: mediatek: fix cannot receive new request when msdc_cmd_is_ready fail
  mmc: mediatek: fill the actual clock for mmc debugfs
  spi: sh-msiof: fix deferred probing
  cdrom: don't attempt to fiddle with cdo->capability
  skd: fixup usage of legacy IO API
  ath10k: allocate small size dma memory in ath10k_pci_diag_write_mem
  ath10k: set probe request oui during driver start
  brcmsmac: AP mode: update beacon when TIM changes
  mt76x0: phy: fix restore phase in mt76x0_phy_recalibrate_after_assoc
  mt76: do not store aggregation sequence number for null-data frames
  EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr()
  powerpc/eeh: Fix use of EEH_PE_KEEP on wrong field
  powerpc/eeh: Fix null deref for devices removed during EEH
  powerpc/boot: Disable vector instructions
  powerpc/boot: Fix opal console in boot wrapper
  powerpc: Fix signedness bug in update_flash_db()
  synclink_gt(): fix compat_ioctl()
  pty: fix compat ioctls
  gfs2: Fix marking bitmaps non-full
  PCI: cadence: Write MSI data with 32bits
  pinctrl: madera: Fix uninitialized variable bug in madera_mux_set_mux
  printk: fix integer overflow in setup_log_buf()
  printk: lock/unlock console only for new logbuf entries
  crypto: testmgr - fix sizeof() on COMP_BUF_SIZE
  ALSA: isight: fix leak of reference to firewire unit in error path of .probe callback
  mwifiex: Fix NL80211_TX_POWER_LIMITED
  drm/i915/userptr: Try to acquire the page lock around set_page_dirty()
  drm/i915/pmu: "Frequency" is reported as accumulated cycles
  drm/amd/powerplay: issue no PPSMC_MSG_GetCurrPkgPwr on unsupported ASICs
  mm/ksm.c: don't WARN if page is still mapped in remove_stable_node()
  Revert "fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()"
  virtio_console: allocate inbufs in add_port() only if it is needed
  nbd:fix memory leak in nbd_get_socket()
  tools: gpio: Correctly add make dependencies for gpio_utils
  gpio: max77620: Fixup debounce delays
  vhost/vsock: split packets to send using multiple buffers
  net/mlx5: Fix auto group size calculation
  net/mlxfw: Verify FSM error code translation doesn't exceed array size
  net/mlx5e: Fix set vf link state error flow
  sfc: Only cancel the PPS workqueue if it exists
  net: sched: ensure opts_len <= IP_TUNNEL_OPTS_MAX in act_tunnel_key
  net/sched: act_pedit: fix WARN() in the traffic path
  net: rtnetlink: prevent underflows in do_setvfinfo()
  net/mlx4_en: Fix wrong limitation for number of TX rings
  net/mlx4_en: fix mlx4 ethtool -N insertion
  mlxsw: spectrum_router: Fix determining underlay for a GRE tunnel

Conflicts:
	block/blk-merge.c
	drivers/net/wireless/ath/wil6210/main.c
	drivers/pinctrl/qcom/pinctrl-spmi-gpio.c

Change-Id: I2055f0bc1eb4ac6b7ade99e91f84bf2e4f4ea7c4
Signed-off-by: Ivaylo Georgiev <irgeorgiev@codeaurora.org>
2020-01-28 03:13:32 -08:00

436 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/page_io.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95,
* Asynchronous swapping added 30.12.95. Stephen Tweedie
* Removed race in async swapping. 14.4.1996. Bruno Haible
* Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
* Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
*/
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/swapops.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/frontswap.h>
#include <linux/blkdev.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
#include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags,
struct page *page, bio_end_io_t end_io)
{
int i, nr = hpage_nr_pages(page);
struct bio *bio;
bio = bio_alloc(gfp_flags, nr);
if (bio) {
struct block_device *bdev;
bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
bio->bi_end_io = end_io;
for (i = 0; i < nr; i++)
bio_add_page(bio, page + i, PAGE_SIZE, 0);
VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
}
return bio;
}
void end_swap_bio_write(struct bio *bio)
{
struct page *page = bio_first_page_all(bio);
if (bio->bi_status) {
SetPageError(page);
/*
* We failed to write the page out to swap-space.
* Re-dirty the page in order to avoid it being reclaimed.
* Also print a dire warning that things will go BAD (tm)
* very quickly.
*
* Also clear PG_reclaim to avoid rotate_reclaimable_page()
*/
set_page_dirty(page);
pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
MAJOR(bio_dev(bio)),
MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
ClearPageReclaim(page);
}
end_page_writeback(page);
bio_put(bio);
}
static void swap_slot_free_notify(struct page *page)
{
struct swap_info_struct *sis;
struct gendisk *disk;
swp_entry_t entry;
/*
* There is no guarantee that the page is in swap cache - the software
* suspend code (at least) uses end_swap_bio_read() against a non-
* swapcache page. So we must check PG_swapcache before proceeding with
* this optimization.
*/
if (unlikely(!PageSwapCache(page)))
return;
sis = page_swap_info(page);
if (!(sis->flags & SWP_BLKDEV))
return;
/*
* The swap subsystem performs lazy swap slot freeing,
* expecting that the page will be swapped out again.
* So we can avoid an unnecessary write if the page
* isn't redirtied.
* This is good for real swap storage because we can
* reduce unnecessary I/O and enhance wear-leveling
* if an SSD is used as the as swap device.
* But if in-memory swap device (eg zram) is used,
* this causes a duplicated copy between uncompressed
* data in VM-owned memory and compressed data in
* zram-owned memory. So let's free zram-owned memory
* and make the VM-owned decompressed page *dirty*,
* so the page should be swapped out somewhere again if
* we again wish to reclaim it.
*/
disk = sis->bdev->bd_disk;
entry.val = page_private(page);
if (disk->fops->swap_slot_free_notify &&
__swap_count(sis, entry) == 1) {
unsigned long offset;
offset = swp_offset(entry);
SetPageDirty(page);
disk->fops->swap_slot_free_notify(sis->bdev,
offset);
}
}
static void end_swap_bio_read(struct bio *bio)
{
struct page *page = bio_first_page_all(bio);
struct task_struct *waiter = bio->bi_private;
if (bio->bi_status) {
SetPageError(page);
ClearPageUptodate(page);
pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
goto out;
}
SetPageUptodate(page);
swap_slot_free_notify(page);
out:
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
bio_put(bio);
wake_up_process(waiter);
put_task_struct(waiter);
}
int generic_swapfile_activate(struct swap_info_struct *sis,
struct file *swap_file,
sector_t *span)
{
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
unsigned blocks_per_page;
unsigned long page_no;
unsigned blkbits;
sector_t probe_block;
sector_t last_block;
sector_t lowest_block = -1;
sector_t highest_block = 0;
int nr_extents = 0;
int ret;
blkbits = inode->i_blkbits;
blocks_per_page = PAGE_SIZE >> blkbits;
/*
* Map all the blocks into the extent list. This code doesn't try
* to be very smart.
*/
probe_block = 0;
page_no = 0;
last_block = i_size_read(inode) >> blkbits;
while ((probe_block + blocks_per_page) <= last_block &&
page_no < sis->max) {
unsigned block_in_page;
sector_t first_block;
cond_resched();
first_block = bmap(inode, probe_block);
if (first_block == 0)
goto bad_bmap;
/*
* It must be PAGE_SIZE aligned on-disk
*/
if (first_block & (blocks_per_page - 1)) {
probe_block++;
goto reprobe;
}
for (block_in_page = 1; block_in_page < blocks_per_page;
block_in_page++) {
sector_t block;
block = bmap(inode, probe_block + block_in_page);
if (block == 0)
goto bad_bmap;
if (block != first_block + block_in_page) {
/* Discontiguity */
probe_block++;
goto reprobe;
}
}
first_block >>= (PAGE_SHIFT - blkbits);
if (page_no) { /* exclude the header page */
if (first_block < lowest_block)
lowest_block = first_block;
if (first_block > highest_block)
highest_block = first_block;
}
/*
* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
*/
ret = add_swap_extent(sis, page_no, 1, first_block);
if (ret < 0)
goto out;
nr_extents += ret;
page_no++;
probe_block += blocks_per_page;
reprobe:
continue;
}
ret = nr_extents;
*span = 1 + highest_block - lowest_block;
if (page_no == 0)
page_no = 1; /* force Empty message */
sis->max = page_no;
sis->pages = page_no - 1;
sis->highest_bit = page_no - 1;
out:
return ret;
bad_bmap:
pr_err("swapon: swapfile has holes\n");
ret = -EINVAL;
goto out;
}
/*
* We may have stale swap cache pages in memory: notice
* them here and get rid of the unnecessary final write.
*/
int swap_writepage(struct page *page, struct writeback_control *wbc)
{
int ret = 0;
if (try_to_free_swap(page)) {
unlock_page(page);
goto out;
}
if (frontswap_store(page) == 0) {
set_page_writeback(page);
unlock_page(page);
end_page_writeback(page);
goto out;
}
ret = __swap_writepage(page, wbc, end_swap_bio_write);
out:
return ret;
}
static sector_t swap_page_sector(struct page *page)
{
return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
}
static inline void count_swpout_vm_event(struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (unlikely(PageTransHuge(page)))
count_vm_event(THP_SWPOUT);
#endif
count_vm_events(PSWPOUT, hpage_nr_pages(page));
}
int __swap_writepage(struct page *page, struct writeback_control *wbc,
bio_end_io_t end_write_func)
{
struct bio *bio;
int ret;
struct swap_info_struct *sis = page_swap_info(page);
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
if (sis->flags & SWP_FILE) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
struct bio_vec bv = {
.bv_page = page,
.bv_len = PAGE_SIZE,
.bv_offset = 0
};
struct iov_iter from;
iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
init_sync_kiocb(&kiocb, swap_file);
kiocb.ki_pos = page_file_offset(page);
set_page_writeback(page);
unlock_page(page);
ret = mapping->a_ops->direct_IO(&kiocb, &from);
if (ret == PAGE_SIZE) {
count_vm_event(PSWPOUT);
ret = 0;
} else {
/*
* In the case of swap-over-nfs, this can be a
* temporary failure if the system has limited
* memory for allocating transmit buffers.
* Mark the page dirty and avoid
* rotate_reclaimable_page but rate-limit the
* messages but do not flag PageError like
* the normal direct-to-bio case as it could
* be temporary.
*/
set_page_dirty(page);
ClearPageReclaim(page);
pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
page_file_offset(page));
}
end_page_writeback(page);
return ret;
}
ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
if (!ret) {
count_swpout_vm_event(page);
return 0;
}
ret = 0;
bio = get_swap_bio(GFP_NOIO, page, end_write_func);
if (bio == NULL) {
set_page_dirty(page);
unlock_page(page);
ret = -ENOMEM;
goto out;
}
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
bio_associate_blkcg_from_page(bio, page);
count_swpout_vm_event(page);
set_page_writeback(page);
unlock_page(page);
submit_bio(bio);
out:
return ret;
}
int swap_readpage(struct page *page, bool synchronous)
{
struct bio *bio;
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
blk_qc_t qc;
struct gendisk *disk;
VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) {
SetPageUptodate(page);
unlock_page(page);
goto out;
}
if (sis->flags & SWP_FILE) {
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
ret = mapping->a_ops->readpage(swap_file, page);
if (!ret)
count_vm_event(PSWPIN);
return ret;
}
ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
if (!ret) {
if (trylock_page(page)) {
swap_slot_free_notify(page);
unlock_page(page);
}
count_vm_event(PSWPIN);
return 0;
}
ret = 0;
bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
if (bio == NULL) {
unlock_page(page);
ret = -ENOMEM;
goto out;
}
disk = bio->bi_disk;
/*
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
*/
get_task_struct(current);
bio->bi_private = current;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
count_vm_event(PSWPIN);
bio_get(bio);
qc = submit_bio(bio);
while (synchronous) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(bio->bi_private))
break;
if (!blk_poll(disk->queue, qc))
break;
}
__set_current_state(TASK_RUNNING);
bio_put(bio);
out:
return ret;
}
int swap_set_page_dirty(struct page *page)
{
struct swap_info_struct *sis = page_swap_info(page);
if (sis->flags & SWP_FILE) {
struct address_space *mapping = sis->swap_file->f_mapping;
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return mapping->a_ops->set_page_dirty(page);
} else {
return __set_page_dirty_no_writeback(page);
}
}