4d01d462e6
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl7wXf4ACgkQONu9yGCS aT6rDBAAg6jIYJhhb9lpK59hpMxNsaFnPfXdA3Z6qARqH7iIQa9TTP9JF5eFndS0 +2wV8t/8Nz/3BWq9NQAF525QJdqyY6Ahcj5QQXzIzEZyb/p5fRVCBOUcBP7uaBCu gdORR7OhHI9+7aGLr05Svb7pVWPLi0Mk5vjvthEIkojEOIREGuGlERRZNlL1SN3y cYDBCCJtD2XiuhyZNLNxtwE/2/d/1xuIG7T3VRDS6oBtqfOXdsy5xoU9lpbbmZQg s1i3cjWgxEYjJOJqONwzfUSu9Zj4GUZfLTx3gtXG7iEiuUfEw3ljEvIrqSqtNxB5 aTysoOu4MSdJTALHkA7Szhk2Q8Pecmo+NdKLfgMCxAWwIEbn1X9seea7QC5M5/lr Q1z150M2+Lcs6z9I/vR+vmPh9YKn1yGV4RbTeMXwiQLlWcRh7vh7jN+YJvrpmJSL BGbsRLB02J4i58CLW7n2rgeq5ycO41bJeWdXSSZjJg7KiZMvuD7mnDv1nUoj3Ad0 lFxgfBRYYZzGCe53xLBXKnjua1lxp8rStUK4iotqkXyhaZqHo0J52okDxSqbP4VZ DYMfgyiFDufITd7l7qK5H6OeWQJ2IPtaude0HiMQf00bdOIrIsl+xXCtFHo6kx6z VxwFUAUZWIKZT9ZWo2DNmbbDSRmij3Pqm6ZiakDSPT+kZFqvkBo= =u5pA -----END PGP SIGNATURE----- Merge 4.19.129 into android-4.19-stable Changes in 4.19.129 ipv6: fix IPV6_ADDRFORM operation logic net_failover: fixed rollback in net_failover_open() bridge: Avoid infinite loop when suppressing NS messages with invalid options vxlan: Avoid infinite loop when suppressing NS messages with invalid options tun: correct header offsets in napi frags mode selftests: bpf: fix use of undeclared RET_IF macro make 'user_access_begin()' do 'access_ok()' Fix 'acccess_ok()' on alpha and SH arch/openrisc: Fix issues with access_ok() x86: uaccess: Inhibit speculation past access_ok() in user_access_begin() lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user() btrfs: merge btrfs_find_device and find_device btrfs: Detect unbalanced tree with empty leaf before crashing btree operations crypto: talitos - fix ECB and CBC algs ivsize Input: mms114 - fix handling of mms345l ARM: 8977/1: ptrace: Fix mask for thumb breakpoint hook sched/fair: Don't NUMA balance for kthreads Input: synaptics - add a second working PNP_ID for Lenovo T470s drivers/net/ibmvnic: Update VNIC protocol version reporting powerpc/xive: Clear the page tables for the ESB IO mapping ath9k_htc: Silence undersized packet warnings RDMA/uverbs: Make the event_queue fds return POLLERR when disassociated x86/cpu/amd: Make erratum #1054 a legacy erratum perf probe: Accept the instance number of kretprobe event mm: add kvfree_sensitive() for freeing sensitive data objects aio: fix async fsync creds btrfs: tree-checker: Check level for leaves and nodes x86_64: Fix jiffies ODR violation x86/PCI: Mark Intel C620 MROMs as having non-compliant BARs x86/speculation: Prevent rogue cross-process SSBD shutdown x86/reboot/quirks: Add MacBook6,1 reboot quirk efi/efivars: Add missing kobject_put() in sysfs entry creation error path ALSA: es1688: Add the missed snd_card_free() ALSA: hda/realtek - add a pintbl quirk for several Lenovo machines ALSA: usb-audio: Fix inconsistent card PM state after resume ALSA: usb-audio: Add vendor, product and profile name for HP Thunderbolt Dock ACPI: sysfs: Fix reference count leak in acpi_sysfs_add_hotplug_profile() ACPI: CPPC: Fix reference count leak in acpi_cppc_processor_probe() ACPI: GED: add support for _Exx / _Lxx handler methods ACPI: PM: Avoid using power resources if there are none for D0 cgroup, blkcg: Prepare some symbols for module and !CONFIG_CGROUP usages nilfs2: fix null pointer dereference at nilfs_segctor_do_construct() spi: dw: Fix controller unregister order spi: bcm2835aux: Fix controller unregister order spi: bcm-qspi: when tx/rx buffer is NULL set to 0 PM: runtime: clk: Fix clk_pm_runtime_get() error path crypto: cavium/nitrox - Fix 'nitrox_get_first_device()' when ndevlist is fully iterated ALSA: pcm: disallow linking stream to itself x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned KVM: x86: Fix APIC page invalidation race kvm: x86: Fix L1TF mitigation for shadow MMU KVM: x86/mmu: Consolidate "is MMIO SPTE" code KVM: x86: only do L1TF workaround on affected processors x86/speculation: Change misspelled STIPB to STIBP x86/speculation: Add support for STIBP always-on preferred mode x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS. x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for indirect branches. spi: No need to assign dummy value in spi_unregister_controller() spi: Fix controller unregister order spi: pxa2xx: Fix controller unregister order spi: bcm2835: Fix controller unregister order spi: pxa2xx: Balance runtime PM enable/disable on error spi: pxa2xx: Fix runtime PM ref imbalance on probe error crypto: virtio: Fix use-after-free in virtio_crypto_skcipher_finalize_req() crypto: virtio: Fix src/dst scatterlist calculation in __virtio_crypto_skcipher_do_req() crypto: virtio: Fix dest length calculation in __virtio_crypto_skcipher_do_req() selftests/net: in rxtimestamp getopt_long needs terminating null entry ovl: initialize error in ovl_copy_xattr proc: Use new_inode not new_inode_pseudo video: fbdev: w100fb: Fix a potential double free. KVM: nSVM: fix condition for filtering async PF KVM: nSVM: leave ASID aside in copy_vmcb_control_area KVM: nVMX: Consult only the "basic" exit reason when routing nested exit KVM: MIPS: Define KVM_ENTRYHI_ASID to cpu_asid_mask(&boot_cpu_data) KVM: MIPS: Fix VPN2_MASK definition for variable cpu_vmbits KVM: arm64: Make vcpu_cp1x() work on Big Endian hosts scsi: megaraid_sas: TM command refire leads to controller firmware crash ath9k: Fix use-after-free Read in ath9k_wmi_ctrl_rx ath9k: Fix use-after-free Write in ath9k_htc_rx_msg ath9x: Fix stack-out-of-bounds Write in ath9k_hif_usb_rx_cb ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb Smack: slab-out-of-bounds in vsscanf drm/vkms: Hold gem object while still in-use mm/slub: fix a memory leak in sysfs_slab_add() fat: don't allow to mount if the FAT length == 0 perf: Add cond_resched() to task_function_call() agp/intel: Reinforce the barrier after GTT updates mmc: sdhci-msm: Clear tuning done flag while hs400 tuning ARM: dts: at91: sama5d2_ptc_ek: fix sdmmc0 node description mmc: sdio: Fix potential NULL pointer error in mmc_sdio_init_card() xen/pvcalls-back: test for errors when calling backend_connect() KVM: arm64: Synchronize sysreg state on injecting an AArch32 exception ACPI: GED: use correct trigger type field in _Exx / _Lxx handling drm: bridge: adv7511: Extend list of audio sample rates crypto: ccp -- don't "select" CONFIG_DMADEVICES media: si2157: Better check for running tuner in init objtool: Ignore empty alternatives spi: pxa2xx: Apply CS clk quirk to BXT net: atlantic: make hw_get_regs optional net: ena: fix error returning in ena_com_get_hash_function() efi/libstub/x86: Work around LLVM ELF quirk build regression arm64: cacheflush: Fix KGDB trap detection spi: dw: Zero DMA Tx and Rx configurations on stack arm64: insn: Fix two bugs in encoding 32-bit logical immediates ixgbe: Fix XDP redirect on archs with PAGE_SIZE above 4K MIPS: Loongson: Build ATI Radeon GPU driver as module Bluetooth: Add SCO fallback for invalid LMP parameters error kgdb: Disable WARN_CONSOLE_UNLOCKED for all kgdb kgdb: Prevent infinite recursive entries to the debugger spi: dw: Enable interrupts in accordance with DMA xfer mode clocksource: dw_apb_timer: Make CPU-affiliation being optional clocksource: dw_apb_timer_of: Fix missing clockevent timers btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums ARM: 8978/1: mm: make act_mm() respect THREAD_SIZE batman-adv: Revert "disable ethtool link speed detection when auto negotiation off" mmc: meson-mx-sdio: trigger a soft reset after a timeout or CRC error spi: dw: Fix Rx-only DMA transfers x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit net: vmxnet3: fix possible buffer overflow caused by bad DMA value in vmxnet3_get_rss() staging: android: ion: use vmap instead of vm_map_ram brcmfmac: fix wrong location to get firmware feature tools api fs: Make xxx__mountpoint() more scalable e1000: Distribute switch variables for initialization dt-bindings: display: mediatek: control dpi pins mode to avoid leakage audit: fix a net reference leak in audit_send_reply() media: dvb: return -EREMOTEIO on i2c transfer failure. media: platform: fcp: Set appropriate DMA parameters MIPS: Make sparse_init() using top-down allocation Bluetooth: btbcm: Add 2 missing models to subver tables audit: fix a net reference leak in audit_list_rules_send() netfilter: nft_nat: return EOPNOTSUPP if type or flags are not supported selftests/bpf: Fix memory leak in extract_build_id() net: bcmgenet: set Rx mode before starting netif lib/mpi: Fix 64-bit MIPS build with Clang exit: Move preemption fixup up, move blocking operations down sched/core: Fix illegal RCU from offline CPUs drivers/perf: hisi: Fix typo in events attribute array net: lpc-enet: fix error return code in lpc_mii_init() media: cec: silence shift wrapping warning in __cec_s_log_addrs() net: allwinner: Fix use correct return type for ndo_start_xmit() powerpc/spufs: fix copy_to_user while atomic xfs: clean up the error handling in xfs_swap_extents Crypto/chcr: fix for ccm(aes) failed test MIPS: Truncate link address into 32bit for 32bit kernel mips: cm: Fix an invalid error code of INTVN_*_ERR kgdb: Fix spurious true from in_dbg_master() xfs: reset buffer write failure state on successful completion xfs: fix duplicate verification from xfs_qm_dqflush() platform/x86: intel-vbtn: Use acpi_evaluate_integer() platform/x86: intel-vbtn: Split keymap into buttons and switches parts platform/x86: intel-vbtn: Do not advertise switches to userspace if they are not there platform/x86: intel-vbtn: Also handle tablet-mode switch on "Detachable" and "Portable" chassis-types nvme: refine the Qemu Identify CNS quirk ath10k: Remove msdu from idr when management pkt send fails wcn36xx: Fix error handling path in 'wcn36xx_probe()' net: qed*: Reduce RX and TX default ring count when running inside kdump kernel mt76: avoid rx reorder buffer overflow md: don't flush workqueue unconditionally in md_open veth: Adjust hard_start offset on redirect XDP frames net/mlx5e: IPoIB, Drop multicast packets that this interface sent rtlwifi: Fix a double free in _rtl_usb_tx_urb_setup() mwifiex: Fix memory corruption in dump_station x86/boot: Correct relocation destination on old linkers mips: MAAR: Use more precise address mask mips: Add udelay lpj numbers adjustment crypto: stm32/crc32 - fix ext4 chksum BUG_ON() crypto: stm32/crc32 - fix run-time self test issue. crypto: stm32/crc32 - fix multi-instance x86/mm: Stop printing BRK addresses m68k: mac: Don't call via_flush_cache() on Mac IIfx btrfs: qgroup: mark qgroup inconsistent if we're inherting snapshot to a new qgroup macvlan: Skip loopback packets in RX handler PCI: Don't disable decoding when mmio_always_on is set MIPS: Fix IRQ tracing when call handle_fpe() and handle_msa_fpe() bcache: fix refcount underflow in bcache_device_free() mmc: sdhci-msm: Set SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 quirk staging: greybus: sdio: Respect the cmd->busy_timeout from the mmc core mmc: via-sdmmc: Respect the cmd->busy_timeout from the mmc core ixgbe: fix signed-integer-overflow warning mmc: sdhci-esdhc-imx: fix the mask for tuning start point spi: dw: Return any value retrieved from the dma_transfer callback cpuidle: Fix three reference count leaks platform/x86: hp-wmi: Convert simple_strtoul() to kstrtou32() platform/x86: intel-hid: Add a quirk to support HP Spectre X2 (2015) platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type string.h: fix incompatibility between FORTIFY_SOURCE and KASAN btrfs: include non-missing as a qualifier for the latest_bdev btrfs: send: emit file capabilities after chown mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked() mm: initialize deferred pages with interrupts enabled ima: Fix ima digest hash table key calculation ima: Directly assign the ima_default_policy pointer to ima_rules evm: Fix possible memory leak in evm_calc_hmac_or_hash() ext4: fix EXT_MAX_EXTENT/INDEX to check for zeroed eh_max ext4: fix error pointer dereference ext4: fix race between ext4_sync_parent() and rename() PCI: Avoid Pericom USB controller OHCI/EHCI PME# defect PCI: Avoid FLR for AMD Matisse HD Audio & USB 3.0 PCI: Avoid FLR for AMD Starship USB 3.0 PCI: Add ACS quirk for iProc PAXB PCI: Add ACS quirk for Intel Root Complex Integrated Endpoints PCI: Remove unused NFP32xx IDs pci:ipmi: Move IPMI PCI class id defines to pci_ids.h hwmon/k10temp, x86/amd_nb: Consolidate shared device IDs x86/amd_nb: Add PCI device IDs for family 17h, model 30h PCI: add USR vendor id and use it in r8169 and w6692 driver PCI: Move Synopsys HAPS platform device IDs PCI: Move Rohm Vendor ID to generic list misc: pci_endpoint_test: Add the layerscape EP device support misc: pci_endpoint_test: Add support to test PCI EP in AM654x PCI: Add Synopsys endpoint EDDA Device ID PCI: Add NVIDIA GPU multi-function power dependencies PCI: Enable NVIDIA HDA controllers PCI: mediatek: Add controller support for MT7629 x86/amd_nb: Add PCI device IDs for family 17h, model 70h ALSA: lx6464es - add support for LX6464ESe pci express variant PCI: Add Genesys Logic, Inc. Vendor ID PCI: Add Amazon's Annapurna Labs vendor ID PCI: vmd: Add device id for VMD device 8086:9A0B x86/amd_nb: Add Family 19h PCI IDs PCI: Add Loongson vendor ID serial: 8250_pci: Move Pericom IDs to pci_ids.h PCI: Make ACS quirk implementations more uniform PCI: Unify ACS quirk desired vs provided checking PCI: Generalize multi-function power dependency device links btrfs: fix error handling when submitting direct I/O bio btrfs: fix wrong file range cleanup after an error filling dealloc range ima: Call ima_calc_boot_aggregate() in ima_eventdigest_init() PCI: Program MPS for RCiEP devices e1000e: Disable TSO for buffer overrun workaround e1000e: Relax condition to trigger reset for ME workaround carl9170: remove P2P_GO support media: go7007: fix a miss of snd_card_free Bluetooth: hci_bcm: fix freeing not-requested IRQ b43legacy: Fix case where channel status is corrupted b43: Fix connection problem with WPA3 b43_legacy: Fix connection problem with WPA3 media: ov5640: fix use of destroyed mutex igb: Report speed and duplex as unknown when device is runtime suspended power: vexpress: add suppress_bind_attrs to true pinctrl: samsung: Correct setting of eint wakeup mask on s5pv210 pinctrl: samsung: Save/restore eint_mask over suspend for EINT_TYPE GPIOs gnss: sirf: fix error return code in sirf_probe() sparc32: fix register window handling in genregs32_[gs]et() sparc64: fix misuses of access_process_vm() in genregs32_[sg]et() dm crypt: avoid truncating the logical block size alpha: fix memory barriers so that they conform to the specification kernel/cpu_pm: Fix uninitted local in cpu_pm ARM: tegra: Correct PL310 Auxiliary Control Register initialization ARM: dts: exynos: Fix GPIO polarity for thr GalaxyS3 CM36651 sensor's bus ARM: dts: at91: sama5d2_ptc_ek: fix vbus pin ARM: dts: s5pv210: Set keep-power-in-suspend for SDHCI1 on Aries drivers/macintosh: Fix memleak in windfarm_pm112 driver powerpc/64s: Don't let DT CPU features set FSCR_DSCR powerpc/64s: Save FSCR to init_task.thread.fscr after feature init kbuild: force to build vmlinux if CONFIG_MODVERSION=y sunrpc: svcauth_gss_register_pseudoflavor must reject duplicate registrations. sunrpc: clean up properly in gss_mech_unregister() mtd: rawnand: brcmnand: fix hamming oob layout mtd: rawnand: pasemi: Fix the probe error path w1: omap-hdq: cleanup to add missing newline for some dev_dbg perf probe: Do not show the skipped events perf probe: Fix to check blacklist address correctly perf probe: Check address correctness by map instead of _etext perf symbols: Fix debuginfo search for Ubuntu Linux 4.19.129 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I7b1108d90ee1109a28fe488a4358b7a3e101d9c9
807 lines
20 KiB
C
807 lines
20 KiB
C
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/string.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/export.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/security.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/swapops.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/userfaultfd_k.h>
|
|
|
|
#include <asm/sections.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include "internal.h"
|
|
|
|
static inline int is_kernel_rodata(unsigned long addr)
|
|
{
|
|
return addr >= (unsigned long)__start_rodata &&
|
|
addr < (unsigned long)__end_rodata;
|
|
}
|
|
|
|
/**
|
|
* kfree_const - conditionally free memory
|
|
* @x: pointer to the memory
|
|
*
|
|
* Function calls kfree only if @x is not in .rodata section.
|
|
*/
|
|
void kfree_const(const void *x)
|
|
{
|
|
if (!is_kernel_rodata((unsigned long)x))
|
|
kfree(x);
|
|
}
|
|
EXPORT_SYMBOL(kfree_const);
|
|
|
|
/**
|
|
* kstrdup - allocate space for and copy an existing string
|
|
* @s: the string to duplicate
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
*/
|
|
char *kstrdup(const char *s, gfp_t gfp)
|
|
{
|
|
size_t len;
|
|
char *buf;
|
|
|
|
if (!s)
|
|
return NULL;
|
|
|
|
len = strlen(s) + 1;
|
|
buf = kmalloc_track_caller(len, gfp);
|
|
if (buf)
|
|
memcpy(buf, s, len);
|
|
return buf;
|
|
}
|
|
EXPORT_SYMBOL(kstrdup);
|
|
|
|
/**
|
|
* kstrdup_const - conditionally duplicate an existing const string
|
|
* @s: the string to duplicate
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
*
|
|
* Function returns source string if it is in .rodata section otherwise it
|
|
* fallbacks to kstrdup.
|
|
* Strings allocated by kstrdup_const should be freed by kfree_const.
|
|
*/
|
|
const char *kstrdup_const(const char *s, gfp_t gfp)
|
|
{
|
|
if (is_kernel_rodata((unsigned long)s))
|
|
return s;
|
|
|
|
return kstrdup(s, gfp);
|
|
}
|
|
EXPORT_SYMBOL(kstrdup_const);
|
|
|
|
/**
|
|
* kstrndup - allocate space for and copy an existing string
|
|
* @s: the string to duplicate
|
|
* @max: read at most @max chars from @s
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
*
|
|
* Note: Use kmemdup_nul() instead if the size is known exactly.
|
|
*/
|
|
char *kstrndup(const char *s, size_t max, gfp_t gfp)
|
|
{
|
|
size_t len;
|
|
char *buf;
|
|
|
|
if (!s)
|
|
return NULL;
|
|
|
|
len = strnlen(s, max);
|
|
buf = kmalloc_track_caller(len+1, gfp);
|
|
if (buf) {
|
|
memcpy(buf, s, len);
|
|
buf[len] = '\0';
|
|
}
|
|
return buf;
|
|
}
|
|
EXPORT_SYMBOL(kstrndup);
|
|
|
|
/**
|
|
* kmemdup - duplicate region of memory
|
|
*
|
|
* @src: memory region to duplicate
|
|
* @len: memory region length
|
|
* @gfp: GFP mask to use
|
|
*/
|
|
void *kmemdup(const void *src, size_t len, gfp_t gfp)
|
|
{
|
|
void *p;
|
|
|
|
p = kmalloc_track_caller(len, gfp);
|
|
if (p)
|
|
memcpy(p, src, len);
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(kmemdup);
|
|
|
|
/**
|
|
* kmemdup_nul - Create a NUL-terminated string from unterminated data
|
|
* @s: The data to stringify
|
|
* @len: The size of the data
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
*/
|
|
char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
|
|
{
|
|
char *buf;
|
|
|
|
if (!s)
|
|
return NULL;
|
|
|
|
buf = kmalloc_track_caller(len + 1, gfp);
|
|
if (buf) {
|
|
memcpy(buf, s, len);
|
|
buf[len] = '\0';
|
|
}
|
|
return buf;
|
|
}
|
|
EXPORT_SYMBOL(kmemdup_nul);
|
|
|
|
/**
|
|
* memdup_user - duplicate memory region from user space
|
|
*
|
|
* @src: source address in user space
|
|
* @len: number of bytes to copy
|
|
*
|
|
* Returns an ERR_PTR() on failure. Result is physically
|
|
* contiguous, to be freed by kfree().
|
|
*/
|
|
void *memdup_user(const void __user *src, size_t len)
|
|
{
|
|
void *p;
|
|
|
|
p = kmalloc_track_caller(len, GFP_USER);
|
|
if (!p)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (copy_from_user(p, src, len)) {
|
|
kfree(p);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(memdup_user);
|
|
|
|
/**
|
|
* vmemdup_user - duplicate memory region from user space
|
|
*
|
|
* @src: source address in user space
|
|
* @len: number of bytes to copy
|
|
*
|
|
* Returns an ERR_PTR() on failure. Result may be not
|
|
* physically contiguous. Use kvfree() to free.
|
|
*/
|
|
void *vmemdup_user(const void __user *src, size_t len)
|
|
{
|
|
void *p;
|
|
|
|
p = kvmalloc(len, GFP_USER);
|
|
if (!p)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (copy_from_user(p, src, len)) {
|
|
kvfree(p);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(vmemdup_user);
|
|
|
|
/**
|
|
* strndup_user - duplicate an existing string from user space
|
|
* @s: The string to duplicate
|
|
* @n: Maximum number of bytes to copy, including the trailing NUL.
|
|
*/
|
|
char *strndup_user(const char __user *s, long n)
|
|
{
|
|
char *p;
|
|
long length;
|
|
|
|
length = strnlen_user(s, n);
|
|
|
|
if (!length)
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
if (length > n)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
p = memdup_user(s, length);
|
|
|
|
if (IS_ERR(p))
|
|
return p;
|
|
|
|
p[length - 1] = '\0';
|
|
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(strndup_user);
|
|
|
|
/**
|
|
* memdup_user_nul - duplicate memory region from user space and NUL-terminate
|
|
*
|
|
* @src: source address in user space
|
|
* @len: number of bytes to copy
|
|
*
|
|
* Returns an ERR_PTR() on failure.
|
|
*/
|
|
void *memdup_user_nul(const void __user *src, size_t len)
|
|
{
|
|
char *p;
|
|
|
|
/*
|
|
* Always use GFP_KERNEL, since copy_from_user() can sleep and
|
|
* cause pagefault, which makes it pointless to use GFP_NOFS
|
|
* or GFP_ATOMIC.
|
|
*/
|
|
p = kmalloc_track_caller(len + 1, GFP_KERNEL);
|
|
if (!p)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (copy_from_user(p, src, len)) {
|
|
kfree(p);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
p[len] = '\0';
|
|
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(memdup_user_nul);
|
|
|
|
void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
struct vm_area_struct *prev, struct rb_node *rb_parent)
|
|
{
|
|
struct vm_area_struct *next;
|
|
|
|
vma->vm_prev = prev;
|
|
if (prev) {
|
|
next = prev->vm_next;
|
|
prev->vm_next = vma;
|
|
} else {
|
|
mm->mmap = vma;
|
|
if (rb_parent)
|
|
next = rb_entry(rb_parent,
|
|
struct vm_area_struct, vm_rb);
|
|
else
|
|
next = NULL;
|
|
}
|
|
vma->vm_next = next;
|
|
if (next)
|
|
next->vm_prev = vma;
|
|
}
|
|
|
|
/* Check if the vma is being used as a stack by this task */
|
|
int vma_is_stack_for_current(struct vm_area_struct *vma)
|
|
{
|
|
struct task_struct * __maybe_unused t = current;
|
|
|
|
return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
|
|
}
|
|
|
|
#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
|
|
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
|
|
{
|
|
mm->mmap_base = TASK_UNMAPPED_BASE;
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
|
* back to the regular GUP.
|
|
* Note a difference with get_user_pages_fast: this always returns the
|
|
* number of pages pinned, 0 if no pages were pinned.
|
|
* If the architecture does not support this function, simply return with no
|
|
* pages pinned.
|
|
*/
|
|
int __weak __get_user_pages_fast(unsigned long start,
|
|
int nr_pages, int write, struct page **pages)
|
|
{
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__get_user_pages_fast);
|
|
|
|
/**
|
|
* get_user_pages_fast() - pin user pages in memory
|
|
* @start: starting user address
|
|
* @nr_pages: number of pages from start to pin
|
|
* @write: whether pages will be written to
|
|
* @pages: array that receives pointers to the pages pinned.
|
|
* Should be at least nr_pages long.
|
|
*
|
|
* Returns number of pages pinned. This may be fewer than the number
|
|
* requested. If nr_pages is 0 or negative, returns 0. If no pages
|
|
* were pinned, returns -errno.
|
|
*
|
|
* get_user_pages_fast provides equivalent functionality to get_user_pages,
|
|
* operating on current and current->mm, with force=0 and vma=NULL. However
|
|
* unlike get_user_pages, it must be called without mmap_sem held.
|
|
*
|
|
* get_user_pages_fast may take mmap_sem and page table locks, so no
|
|
* assumptions can be made about lack of locking. get_user_pages_fast is to be
|
|
* implemented in a way that is advantageous (vs get_user_pages()) when the
|
|
* user memory area is already faulted in and present in ptes. However if the
|
|
* pages have to be faulted in, it may turn out to be slightly slower so
|
|
* callers need to carefully consider what to use. On many architectures,
|
|
* get_user_pages_fast simply falls back to get_user_pages.
|
|
*/
|
|
int __weak get_user_pages_fast(unsigned long start,
|
|
int nr_pages, int write, struct page **pages)
|
|
{
|
|
return get_user_pages_unlocked(start, nr_pages, pages,
|
|
write ? FOLL_WRITE : 0);
|
|
}
|
|
EXPORT_SYMBOL_GPL(get_user_pages_fast);
|
|
|
|
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
|
|
unsigned long len, unsigned long prot,
|
|
unsigned long flag, unsigned long pgoff)
|
|
{
|
|
unsigned long ret;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long populate;
|
|
LIST_HEAD(uf);
|
|
|
|
ret = security_mmap_file(file, prot, flag);
|
|
if (!ret) {
|
|
if (down_write_killable(&mm->mmap_sem))
|
|
return -EINTR;
|
|
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
|
|
&populate, &uf);
|
|
up_write(&mm->mmap_sem);
|
|
userfaultfd_unmap_complete(mm, &uf);
|
|
if (populate)
|
|
mm_populate(ret, populate);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
unsigned long vm_mmap(struct file *file, unsigned long addr,
|
|
unsigned long len, unsigned long prot,
|
|
unsigned long flag, unsigned long offset)
|
|
{
|
|
if (unlikely(offset + PAGE_ALIGN(len) < offset))
|
|
return -EINVAL;
|
|
if (unlikely(offset_in_page(offset)))
|
|
return -EINVAL;
|
|
|
|
return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
|
|
}
|
|
EXPORT_SYMBOL(vm_mmap);
|
|
|
|
/**
|
|
* kvmalloc_node - attempt to allocate physically contiguous memory, but upon
|
|
* failure, fall back to non-contiguous (vmalloc) allocation.
|
|
* @size: size of the request.
|
|
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
|
|
* @node: numa node to allocate from
|
|
*
|
|
* Uses kmalloc to get the memory but if the allocation fails then falls back
|
|
* to the vmalloc allocator. Use kvfree for freeing the memory.
|
|
*
|
|
* Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
|
|
* __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
|
|
* preferable to the vmalloc fallback, due to visible performance drawbacks.
|
|
*
|
|
* Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
|
|
* fall back to vmalloc.
|
|
*/
|
|
void *kvmalloc_node(size_t size, gfp_t flags, int node)
|
|
{
|
|
gfp_t kmalloc_flags = flags;
|
|
void *ret;
|
|
|
|
/*
|
|
* vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
|
|
* so the given set of flags has to be compatible.
|
|
*/
|
|
if ((flags & GFP_KERNEL) != GFP_KERNEL)
|
|
return kmalloc_node(size, flags, node);
|
|
|
|
/*
|
|
* We want to attempt a large physically contiguous block first because
|
|
* it is less likely to fragment multiple larger blocks and therefore
|
|
* contribute to a long term fragmentation less than vmalloc fallback.
|
|
* However make sure that larger requests are not too disruptive - no
|
|
* OOM killer and no allocation failure warnings as we have a fallback.
|
|
*/
|
|
if (size > PAGE_SIZE) {
|
|
kmalloc_flags |= __GFP_NOWARN;
|
|
|
|
if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
|
|
kmalloc_flags |= __GFP_NORETRY;
|
|
}
|
|
|
|
ret = kmalloc_node(size, kmalloc_flags, node);
|
|
|
|
/*
|
|
* It doesn't really make sense to fallback to vmalloc for sub page
|
|
* requests
|
|
*/
|
|
if (ret || size <= PAGE_SIZE)
|
|
return ret;
|
|
|
|
return __vmalloc_node_flags_caller(size, node, flags,
|
|
__builtin_return_address(0));
|
|
}
|
|
EXPORT_SYMBOL(kvmalloc_node);
|
|
|
|
/**
|
|
* kvfree() - Free memory.
|
|
* @addr: Pointer to allocated memory.
|
|
*
|
|
* kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
|
|
* It is slightly more efficient to use kfree() or vfree() if you are certain
|
|
* that you know which one to use.
|
|
*
|
|
* Context: Any context except NMI.
|
|
*/
|
|
void kvfree(const void *addr)
|
|
{
|
|
if (is_vmalloc_addr(addr))
|
|
vfree(addr);
|
|
else
|
|
kfree(addr);
|
|
}
|
|
EXPORT_SYMBOL(kvfree);
|
|
|
|
/**
|
|
* kvfree_sensitive - Free a data object containing sensitive information.
|
|
* @addr: address of the data object to be freed.
|
|
* @len: length of the data object.
|
|
*
|
|
* Use the special memzero_explicit() function to clear the content of a
|
|
* kvmalloc'ed object containing sensitive data to make sure that the
|
|
* compiler won't optimize out the data clearing.
|
|
*/
|
|
void kvfree_sensitive(const void *addr, size_t len)
|
|
{
|
|
if (likely(!ZERO_OR_NULL_PTR(addr))) {
|
|
memzero_explicit((void *)addr, len);
|
|
kvfree(addr);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(kvfree_sensitive);
|
|
|
|
static inline void *__page_rmapping(struct page *page)
|
|
{
|
|
unsigned long mapping;
|
|
|
|
mapping = (unsigned long)page->mapping;
|
|
mapping &= ~PAGE_MAPPING_FLAGS;
|
|
|
|
return (void *)mapping;
|
|
}
|
|
|
|
/* Neutral page->mapping pointer to address_space or anon_vma or other */
|
|
void *page_rmapping(struct page *page)
|
|
{
|
|
page = compound_head(page);
|
|
return __page_rmapping(page);
|
|
}
|
|
|
|
/*
|
|
* Return true if this page is mapped into pagetables.
|
|
* For compound page it returns true if any subpage of compound page is mapped.
|
|
*/
|
|
bool page_mapped(struct page *page)
|
|
{
|
|
int i;
|
|
|
|
if (likely(!PageCompound(page)))
|
|
return atomic_read(&page->_mapcount) >= 0;
|
|
page = compound_head(page);
|
|
if (atomic_read(compound_mapcount_ptr(page)) >= 0)
|
|
return true;
|
|
if (PageHuge(page))
|
|
return false;
|
|
for (i = 0; i < (1 << compound_order(page)); i++) {
|
|
if (atomic_read(&page[i]._mapcount) >= 0)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(page_mapped);
|
|
|
|
struct anon_vma *page_anon_vma(struct page *page)
|
|
{
|
|
unsigned long mapping;
|
|
|
|
page = compound_head(page);
|
|
mapping = (unsigned long)page->mapping;
|
|
if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
|
return NULL;
|
|
return __page_rmapping(page);
|
|
}
|
|
|
|
struct address_space *page_mapping(struct page *page)
|
|
{
|
|
struct address_space *mapping;
|
|
|
|
page = compound_head(page);
|
|
|
|
/* This happens if someone calls flush_dcache_page on slab page */
|
|
if (unlikely(PageSlab(page)))
|
|
return NULL;
|
|
|
|
if (unlikely(PageSwapCache(page))) {
|
|
swp_entry_t entry;
|
|
|
|
entry.val = page_private(page);
|
|
return swap_address_space(entry);
|
|
}
|
|
|
|
mapping = page->mapping;
|
|
if ((unsigned long)mapping & PAGE_MAPPING_ANON)
|
|
return NULL;
|
|
|
|
return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
|
|
}
|
|
EXPORT_SYMBOL(page_mapping);
|
|
|
|
/*
|
|
* For file cache pages, return the address_space, otherwise return NULL
|
|
*/
|
|
struct address_space *page_mapping_file(struct page *page)
|
|
{
|
|
if (unlikely(PageSwapCache(page)))
|
|
return NULL;
|
|
return page_mapping(page);
|
|
}
|
|
|
|
/* Slow path of page_mapcount() for compound pages */
|
|
int __page_mapcount(struct page *page)
|
|
{
|
|
int ret;
|
|
|
|
ret = atomic_read(&page->_mapcount) + 1;
|
|
/*
|
|
* For file THP page->_mapcount contains total number of mapping
|
|
* of the page: no need to look into compound_mapcount.
|
|
*/
|
|
if (!PageAnon(page) && !PageHuge(page))
|
|
return ret;
|
|
page = compound_head(page);
|
|
ret += atomic_read(compound_mapcount_ptr(page)) + 1;
|
|
if (PageDoubleMap(page))
|
|
ret--;
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__page_mapcount);
|
|
|
|
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
|
|
int sysctl_overcommit_ratio __read_mostly = 50;
|
|
unsigned long sysctl_overcommit_kbytes __read_mostly;
|
|
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
|
|
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
|
|
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
|
|
|
|
int overcommit_ratio_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp,
|
|
loff_t *ppos)
|
|
{
|
|
int ret;
|
|
|
|
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
|
if (ret == 0 && write)
|
|
sysctl_overcommit_kbytes = 0;
|
|
return ret;
|
|
}
|
|
|
|
int overcommit_kbytes_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp,
|
|
loff_t *ppos)
|
|
{
|
|
int ret;
|
|
|
|
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
|
|
if (ret == 0 && write)
|
|
sysctl_overcommit_ratio = 0;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
|
|
*/
|
|
unsigned long vm_commit_limit(void)
|
|
{
|
|
unsigned long allowed;
|
|
|
|
if (sysctl_overcommit_kbytes)
|
|
allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
|
|
else
|
|
allowed = ((totalram_pages - hugetlb_total_pages())
|
|
* sysctl_overcommit_ratio / 100);
|
|
allowed += total_swap_pages;
|
|
|
|
return allowed;
|
|
}
|
|
|
|
/*
|
|
* Make sure vm_committed_as in one cacheline and not cacheline shared with
|
|
* other variables. It can be updated by several CPUs frequently.
|
|
*/
|
|
struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
|
|
|
|
/*
|
|
* The global memory commitment made in the system can be a metric
|
|
* that can be used to drive ballooning decisions when Linux is hosted
|
|
* as a guest. On Hyper-V, the host implements a policy engine for dynamically
|
|
* balancing memory across competing virtual machines that are hosted.
|
|
* Several metrics drive this policy engine including the guest reported
|
|
* memory commitment.
|
|
*/
|
|
unsigned long vm_memory_committed(void)
|
|
{
|
|
return percpu_counter_read_positive(&vm_committed_as);
|
|
}
|
|
EXPORT_SYMBOL_GPL(vm_memory_committed);
|
|
|
|
/*
|
|
* Check that a process has enough memory to allocate a new virtual
|
|
* mapping. 0 means there is enough memory for the allocation to
|
|
* succeed and -ENOMEM implies there is not.
|
|
*
|
|
* We currently support three overcommit policies, which are set via the
|
|
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting.rst
|
|
*
|
|
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
|
|
* Additional code 2002 Jul 20 by Robert Love.
|
|
*
|
|
* cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
|
|
*
|
|
* Note this is a helper function intended to be used by LSMs which
|
|
* wish to use this logic.
|
|
*/
|
|
int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
|
{
|
|
long free, allowed, reserve;
|
|
|
|
VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
|
|
-(s64)vm_committed_as_batch * num_online_cpus(),
|
|
"memory commitment underflow");
|
|
|
|
vm_acct_memory(pages);
|
|
|
|
/*
|
|
* Sometimes we want to use more memory than we have
|
|
*/
|
|
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
|
|
return 0;
|
|
|
|
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
|
|
free = global_zone_page_state(NR_FREE_PAGES);
|
|
free += global_node_page_state(NR_FILE_PAGES);
|
|
|
|
/*
|
|
* shmem pages shouldn't be counted as free in this
|
|
* case, they can't be purged, only swapped out, and
|
|
* that won't affect the overall amount of available
|
|
* memory in the system.
|
|
*/
|
|
free -= global_node_page_state(NR_SHMEM);
|
|
|
|
free += get_nr_swap_pages();
|
|
|
|
/*
|
|
* Any slabs which are created with the
|
|
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
|
|
* which are reclaimable, under pressure. The dentry
|
|
* cache and most inode caches should fall into this
|
|
*/
|
|
free += global_node_page_state(NR_SLAB_RECLAIMABLE);
|
|
|
|
/*
|
|
* Part of the kernel memory, which can be released
|
|
* under memory pressure.
|
|
*/
|
|
free += global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
|
|
|
|
/*
|
|
* Leave reserved pages. The pages are not for anonymous pages.
|
|
*/
|
|
if (free <= totalreserve_pages)
|
|
goto error;
|
|
else
|
|
free -= totalreserve_pages;
|
|
|
|
/*
|
|
* Reserve some for root
|
|
*/
|
|
if (!cap_sys_admin)
|
|
free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
|
|
|
|
if (free > pages)
|
|
return 0;
|
|
|
|
goto error;
|
|
}
|
|
|
|
allowed = vm_commit_limit();
|
|
/*
|
|
* Reserve some for root
|
|
*/
|
|
if (!cap_sys_admin)
|
|
allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
|
|
|
|
/*
|
|
* Don't let a single process grow so big a user can't recover
|
|
*/
|
|
if (mm) {
|
|
reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
|
|
allowed -= min_t(long, mm->total_vm / 32, reserve);
|
|
}
|
|
|
|
if (percpu_counter_read_positive(&vm_committed_as) < allowed)
|
|
return 0;
|
|
error:
|
|
vm_unacct_memory(pages);
|
|
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/**
|
|
* get_cmdline() - copy the cmdline value to a buffer.
|
|
* @task: the task whose cmdline value to copy.
|
|
* @buffer: the buffer to copy to.
|
|
* @buflen: the length of the buffer. Larger cmdline values are truncated
|
|
* to this length.
|
|
* Returns the size of the cmdline field copied. Note that the copy does
|
|
* not guarantee an ending NULL byte.
|
|
*/
|
|
int get_cmdline(struct task_struct *task, char *buffer, int buflen)
|
|
{
|
|
int res = 0;
|
|
unsigned int len;
|
|
struct mm_struct *mm = get_task_mm(task);
|
|
unsigned long arg_start, arg_end, env_start, env_end;
|
|
if (!mm)
|
|
goto out;
|
|
if (!mm->arg_end)
|
|
goto out_mm; /* Shh! No looking before we're done */
|
|
|
|
down_read(&mm->mmap_sem);
|
|
arg_start = mm->arg_start;
|
|
arg_end = mm->arg_end;
|
|
env_start = mm->env_start;
|
|
env_end = mm->env_end;
|
|
up_read(&mm->mmap_sem);
|
|
|
|
len = arg_end - arg_start;
|
|
|
|
if (len > buflen)
|
|
len = buflen;
|
|
|
|
res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
|
|
|
|
/*
|
|
* If the nul at the end of args has been overwritten, then
|
|
* assume application is using setproctitle(3).
|
|
*/
|
|
if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
|
|
len = strnlen(buffer, res);
|
|
if (len < res) {
|
|
res = len;
|
|
} else {
|
|
len = env_end - env_start;
|
|
if (len > buflen - res)
|
|
len = buflen - res;
|
|
res += access_process_vm(task, env_start,
|
|
buffer+res, len,
|
|
FOLL_FORCE);
|
|
res = strnlen(buffer, res);
|
|
}
|
|
}
|
|
out_mm:
|
|
mmput(mm);
|
|
out:
|
|
return res;
|
|
}
|