0cc34620e8
* refs/heads/tmp-204dd19: UPSTREAM: driver core: Avoid deferred probe due to fw_devlink_pause/resume() UPSTREAM: driver core: Rename dev_links_info.defer_sync to defer_hook UPSTREAM: driver core: Don't do deferred probe in parallel with kernel_init thread Restore sdcardfs feature Revert rpmh and usb changes Linux 4.19.136 regmap: debugfs: check count when read regmap file rtnetlink: Fix memory(net_device) leak when ->newlink fails udp: Improve load balancing for SO_REUSEPORT. udp: Copy has_conns in reuseport_grow(). sctp: shrink stream outq when fails to do addstream reconf sctp: shrink stream outq only when new outcnt < old outcnt AX.25: Prevent integer overflows in connect and sendmsg tcp: allow at most one TLP probe per flight rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA qrtr: orphan socket in qrtr_release() net: udp: Fix wrong clean up for IS_UDPLITE macro net-sysfs: add a newline when printing 'tx_timeout' by sysfs ip6_gre: fix null-ptr-deref in ip6gre_init_net() drivers/net/wan/x25_asy: Fix to make it work dev: Defer free of skbs in flush_backlog AX.25: Prevent out-of-bounds read in ax25_sendmsg() AX.25: Fix out-of-bounds read in ax25_connect() Linux 4.19.135 ath9k: Fix regression with Atheros 9271 ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb dm integrity: fix integrity recalculation that is improperly skipped ASoC: qcom: Drop HAS_DMA dependency to fix link failure ASoC: rt5670: Add new gpio1_is_ext_spk_en quirk and enable it on the Lenovo Miix 2 10 x86, vmlinux.lds: Page-align end of ..page_aligned sections parisc: Add atomic64_set_release() define to avoid CPU soft lockups drm/amd/powerplay: fix a crash when overclocking Vega M drm/amdgpu: Fix NULL dereference in dpm sysfs handlers io-mapping: indicate mapping failure mm: memcg/slab: fix memory leak at non-root kmem_cache destroy mm: memcg/slab: synchronize access to kmem_cache dying flag using a spinlock mm/memcg: fix refcount error while moving and swapping Makefile: Fix GCC_TOOLCHAIN_DIR prefix for Clang cross compilation vt: Reject zero-sized screen buffer size. fbdev: Detect integer underflow at "struct fbcon_ops"->clear_margins. serial: 8250_mtk: Fix high-speed baud rates clamping serial: 8250: fix null-ptr-deref in serial8250_start_tx() staging: comedi: addi_apci_1564: check INSN_CONFIG_DIGITAL_TRIG shift staging: comedi: addi_apci_1500: check INSN_CONFIG_DIGITAL_TRIG shift staging: comedi: ni_6527: fix INSN_CONFIG_DIGITAL_TRIG support staging: comedi: addi_apci_1032: check INSN_CONFIG_DIGITAL_TRIG shift staging: wlan-ng: properly check endpoint types Revert "cifs: Fix the target file was deleted when rename failed." usb: xhci: Fix ASM2142/ASM3142 DMA addressing usb: xhci-mtk: fix the failure of bandwidth allocation binder: Don't use mmput() from shrinker function. RISC-V: Upgrade smp_mb__after_spinlock() to iorw,iorw x86: math-emu: Fix up 'cmp' insn for clang ias arm64: Use test_tsk_thread_flag() for checking TIF_SINGLESTEP hwmon: (scmi) Fix potential buffer overflow in scmi_hwmon_probe() hwmon: (adm1275) Make sure we are reading enough data for different chips usb: gadget: udc: gr_udc: fix memleak on error handling path in gr_ep_init() Input: synaptics - enable InterTouch for ThinkPad X1E 1st gen dmaengine: ioat setting ioat timeout as module parameter hwmon: (aspeed-pwm-tacho) Avoid possible buffer overflow regmap: dev_get_regmap_match(): fix string comparison spi: mediatek: use correct SPI_CFG2_REG MACRO Input: add `SW_MACHINE_COVER` dmaengine: tegra210-adma: Fix runtime PM imbalance on error HID: apple: Disable Fn-key key-re-mapping on clone keyboards HID: steam: fixes race in handling device list. HID: alps: support devices with report id 2 HID: i2c-hid: add Mediacom FlexBook edge13 to descriptor override scripts/gdb: fix lx-symbols 'gdb.error' while loading modules scripts/decode_stacktrace: strip basepath from all paths serial: exar: Fix GPIO configuration for Sealevel cards based on XR17V35X bonding: check return value of register_netdevice() in bond_newlink() i2c: rcar: always clear ICSAR to avoid side effects net: ethernet: ave: Fix error returns in ave_init ipvs: fix the connection sync failed in some cases qed: suppress "don't support RoCE & iWARP" flooding on HW init mlxsw: destroy workqueue when trap_register in mlxsw_emad_init bonding: check error value of register_netdevice() immediately net: smc91x: Fix possible memory leak in smc_drv_probe() drm: sun4i: hdmi: Fix inverted HPD result ieee802154: fix one possible memleak in adf7242_probe net: dp83640: fix SIOCSHWTSTAMP to update the struct with actual configuration ax88172a: fix ax88172a_unbind() failures hippi: Fix a size used in a 'pci_free_consistent()' in an error handling path fpga: dfl: fix bug in port reset handshake bnxt_en: Fix race when modifying pause settings. btrfs: fix page leaks after failure to lock page for delalloc btrfs: fix mount failure caused by race with umount btrfs: fix double free on ulist after backref resolution failure ASoC: rt5670: Correct RT5670_LDO_SEL_MASK ALSA: info: Drop WARN_ON() from buffer NULL sanity check uprobes: Change handle_swbp() to send SIGTRAP with si_code=SI_KERNEL, to fix GDB regression IB/umem: fix reference count leak in ib_umem_odp_get() tipc: clean up skb list lock handling on send path spi: spi-fsl-dspi: Exit the ISR with IRQ_NONE when it's not ours SUNRPC reverting d03727b248d0 ("NFSv4 fix CLOSE not waiting for direct IO compeletion") irqdomain/treewide: Keep firmware node unconditionally allocated fuse: fix weird page warning drivers/firmware/psci: Fix memory leakage in alloc_init_cpu_groups() drm/nouveau/i2c/g94-: increase NV_PMGR_DP_AUXCTL_TRANSACTREQ timeout net: sky2: initialize return of gm_phy_read drivers/net/wan/lapbether: Fixed the value of hard_header_len xtensa: update *pos in cpuinfo_op.next xtensa: fix __sync_fetch_and_{and,or}_4 declarations scsi: scsi_transport_spi: Fix function pointer check mac80211: allow rx of mesh eapol frames with default rx key pinctrl: amd: fix npins for uart0 in kerncz_groups gpio: arizona: put pm_runtime in case of failure gpio: arizona: handle pm_runtime_get_sync failure case soc: qcom: rpmh: Dirt can only make you dirtier, not cleaner ANDROID: build: update ABI definitions ANDROID: update the kernel release format for GKI ANDROID: Incremental fs: magic number compatible 32-bit ANDROID: kbuild: don't merge .*..compoundliteral in modules ANDROID: GKI: preserve ABI for struct sock_cgroup_data Revert "genetlink: remove genl_bind" Revert "arm64/alternatives: use subsections for replacement sequences" Linux 4.19.134 spi: sprd: switch the sequence of setting WDG_LOAD_LOW and _HIGH rxrpc: Fix trace string libceph: don't omit recovery_deletes in target_copy() printk: queue wake_up_klogd irq_work only if per-CPU areas are ready genirq/affinity: Handle affinity setting on inactive interrupts correctly sched/fair: handle case of task_h_load() returning 0 sched: Fix unreliable rseq cpu_id for new tasks arm64: compat: Ensure upper 32 bits of x0 are zero on syscall return arm64: ptrace: Consistently use pseudo-singlestep exceptions arm64: ptrace: Override SPSR.SS when single-stepping is enabled thermal/drivers/cpufreq_cooling: Fix wrong frequency converted from power misc: atmel-ssc: lock with mutex instead of spinlock dmaengine: fsl-edma: Fix NULL pointer exception in fsl_edma_tx_handler intel_th: Fix a NULL dereference when hub driver is not loaded intel_th: pci: Add Emmitsburg PCH support intel_th: pci: Add Tiger Lake PCH-H support intel_th: pci: Add Jasper Lake CPU support powerpc/book3s64/pkeys: Fix pkey_access_permitted() for execute disable pkey hwmon: (emc2103) fix unable to change fan pwm1_enable attribute riscv: use 16KB kernel stack on 64-bit MIPS: Fix build for LTS kernel caused by backporting lpj adjustment timer: Fix wheel index calculation on last level timer: Prevent base->clk from moving backward uio_pdrv_genirq: fix use without device tree and no interrupt Input: i8042 - add Lenovo XiaoXin Air 12 to i8042 nomux list mei: bus: don't clean driver pointer Revert "zram: convert remaining CLASS_ATTR() to CLASS_ATTR_RO()" fuse: Fix parameter for FS_IOC_{GET,SET}FLAGS ovl: fix unneeded call to ovl_change_flags() ovl: relax WARN_ON() when decoding lower directory file handle ovl: inode reference leak in ovl_is_inuse true case. serial: mxs-auart: add missed iounmap() in probe failure and remove virtio: virtio_console: add missing MODULE_DEVICE_TABLE() for rproc serial virt: vbox: Fix guest capabilities mask check virt: vbox: Fix VBGL_IOCTL_VMMDEV_REQUEST_BIG and _LOG req numbers to match upstream USB: serial: option: add Quectel EG95 LTE modem USB: serial: option: add GosunCn GM500 series USB: serial: ch341: add new Product ID for CH340 USB: serial: cypress_m8: enable Simply Automated UPB PIM USB: serial: iuu_phoenix: fix memory corruption usb: gadget: function: fix missing spinlock in f_uac1_legacy usb: chipidea: core: add wakeup support for extcon usb: dwc2: Fix shutdown callback in platform USB: c67x00: fix use after free in c67x00_giveback_urb ALSA: hda/realtek - Enable Speaker for ASUS UX533 and UX534 ALSA: hda/realtek - change to suitable link model for ASUS platform ALSA: usb-audio: Fix race against the error recovery URB submission ALSA: line6: Sync the pending work cancel at disconnection ALSA: line6: Perform sanity check for each URB creation HID: quirks: Ignore Simply Automated UPB PIM HID: quirks: Always poll Obins Anne Pro 2 keyboard HID: magicmouse: do not set up autorepeat slimbus: core: Fix mismatch in of_node_get/put mtd: rawnand: oxnas: Release all devices in the _remove() path mtd: rawnand: oxnas: Unregister all devices on error mtd: rawnand: oxnas: Keep track of registered devices mtd: rawnand: brcmnand: fix CS0 layout mtd: rawnand: timings: Fix default tR_max and tCCS_min timings mtd: rawnand: marvell: Fix probe error path mtd: rawnand: marvell: Use nand_cleanup() when the device is not yet registered soc: qcom: rpmh-rsc: Allow using free WAKE TCS for active request soc: qcom: rpmh-rsc: Clear active mode configuration for wake TCS soc: qcom: rpmh: Invalidate SLEEP and WAKE TCSes before flushing new data soc: qcom: rpmh: Update dirty flag only when data changes perf stat: Zero all the 'ena' and 'run' array slot stats for interval mode apparmor: ensure that dfa state tables have entries copy_xstate_to_kernel: Fix typo which caused GDB regression regmap: debugfs: Don't sleep while atomic for fast_io regmaps ARM: dts: socfpga: Align L2 cache-controller nodename with dtschema Revert "thermal: mediatek: fix register index error" staging: comedi: verify array index is correct before using it usb: gadget: udc: atmel: fix uninitialized read in debug printk spi: spi-sun6i: sun6i_spi_transfer_one(): fix setting of clock rate arm64: dts: meson: add missing gxl rng clock phy: sun4i-usb: fix dereference of pointer phy0 before it is null checked iio:health:afe4404 Fix timestamp alignment and prevent data leak. ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Flight S ACPI: video: Use native backlight on Acer TravelMate 5735Z Input: mms114 - add extra compatible for mms345l ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Alpha S ACPI: video: Use native backlight on Acer Aspire 5783z ALSA: usb-audio: Rewrite registration quirk handling mmc: sdhci: do not enable card detect interrupt for gpio cd type doc: dt: bindings: usb: dwc3: Update entries for disabling SS instances in park mode ALSA: usb-audio: Create a registration quirk for Kingston HyperX Amp (0951:16d8) scsi: sr: remove references to BLK_DEV_SR_VENDOR, leave it enabled ARM: at91: pm: add quirk for sam9x60's ulp1 HID: quirks: Remove ITE 8595 entry from hid_have_special_driver net: sfp: add some quirks for GPON modules net: sfp: add support for module quirks Revert "usb/ehci-platform: Set PM runtime as active on resume" Revert "usb/xhci-plat: Set PM runtime as active on resume" Revert "usb/ohci-platform: Fix a warning when hibernating" of: of_mdio: Correct loop scanning logic net: dsa: bcm_sf2: Fix node reference count spi: spi-fsl-dspi: Fix lockup if device is shutdown during SPI transfer spi: fix initial SPI_SR value in spi-fsl-dspi iio:health:afe4403 Fix timestamp alignment and prevent data leak. iio:pressure:ms5611 Fix buffer element alignment iio:humidity:hts221 Fix alignment and data leak issues iio: pressure: zpa2326: handle pm_runtime_get_sync failure iio: mma8452: Add missed iio_device_unregister() call in mma8452_probe() iio: magnetometer: ak8974: Fix runtime PM imbalance on error iio:humidity:hdc100x Fix alignment and data leak issues iio:magnetometer:ak8974: Fix alignment and data leak issues arm64/alternatives: don't patch up internal branches i2c: eg20t: Load module automatically if ID matches gfs2: read-only mounts should grab the sd_freeze_gl glock tpm_tis: extra chip->ops check on error path in tpm_tis_core_init arm64/alternatives: use subsections for replacement sequences m68k: mm: fix node memblock init m68k: nommu: register start of the memory with memblock drm/exynos: fix ref count leak in mic_pre_enable drm/msm: fix potential memleak in error branch vlan: consolidate VLAN parsing code and limit max parsing depth sched: consistently handle layer3 header accesses in the presence of VLANs cgroup: Fix sock_cgroup_data on big-endian. cgroup: fix cgroup_sk_alloc() for sk_clone_lock() tcp: md5: allow changing MD5 keys in all socket states tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers tcp: md5: do not send silly options in SYNCOOKIES tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key() tcp: make sure listeners don't initialize congestion-control state tcp: fix SO_RCVLOWAT possible hangs under high mem pressure net: usb: qmi_wwan: add support for Quectel EG95 LTE modem net_sched: fix a memory leak in atm_tc_init() net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb llc: make sure applications use ARPHRD_ETHER l2tp: remove skb_dst_set() from l2tp_xmit_skb() ipv4: fill fl4_icmp_{type,code} in ping_v4_sendmsg genetlink: remove genl_bind net: rmnet: fix lower interface leak perf: Make perf able to build with latest libbfd UPSTREAM: media: v4l2-ctrl: Add H264 profile and levels UPSTREAM: media: v4l2-ctrl: Add control for h.264 chroma qp offset ANDROID: GKI: ASoC: compress: revert some code to avoid race condition ANDROID: GKI: Update the ABI xml representation. ANDROID: GKI: kernel: tick-sched: Add an API for wakeup callbacks ANDROID: ASoC: Compress: Check and set pcm_new driver op Revert "ANDROID: GKI: arm64: gki_defconfig: Disable CONFIG_ARM64_TAGGED_ADDR_ABI" ANDROID: arm64: configs: enabe CONFIG_TMPFS Revert "ALSA: compress: fix partial_drain completion state" ANDROID: GKI: enable CONFIG_EXT4_FS_POSIX_ACL. ANDROID: GKI: set CONFIG_STATIC_USERMODEHELPER_PATH Linux 4.19.133 s390/mm: fix huge pte soft dirty copying ARC: elf: use right ELF_ARCH ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE dm: use noio when sending kobject event drm/radeon: fix double free btrfs: fix fatal extent_buffer readahead vs releasepage race Revert "ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb" bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok() kprobes: Do not expose probe addresses to non-CAP_SYSLOG module: Do not expose section addresses to non-CAP_SYSLOG module: Refactor section attr into bin attribute kernel: module: Use struct_size() helper kallsyms: Refactor kallsyms_show_value() to take cred KVM: x86: Mark CR4.TSD as being possibly owned by the guest KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode KVM: x86: bit 8 of non-leaf PDPEs is not reserved KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART KVM: arm64: Fix definition of PAGE_HYP_DEVICE ALSA: usb-audio: add quirk for MacroSilicon MS2109 ALSA: hda - let hs_mic be picked ahead of hp_mic ALSA: opl3: fix infoleak in opl3 mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON() net: macb: mark device wake capable when "magic-packet" property present bnxt_en: fix NULL dereference in case SR-IOV configuration fails cxgb4: fix all-mask IP address comparison nbd: Fix memory leak in nbd_add_socket arm64: kgdb: Fix single-step exception handling oops ALSA: compress: fix partial_drain completion state net: hns3: fix use-after-free when doing self test smsc95xx: avoid memory leak in smsc95xx_bind smsc95xx: check return value of smsc95xx_reset net: cxgb4: fix return error value in t4_prep_fw drm/mediatek: Check plane visibility in atomic_update net: qrtr: Fix an out of bounds read qrtr_endpoint_post() x86/entry: Increase entry_stack size to a full page nvme-rdma: assign completion vector correctly block: release bip in a right way in error path usb: dwc3: pci: Fix reference count leak in dwc3_pci_resume_work scsi: mptscsih: Fix read sense data size ARM: imx6: add missing put_device() call in imx6q_suspend_init() cifs: update ctime and mtime during truncate s390/kasan: fix early pgm check handler execution drm: panel-orientation-quirks: Use generic orientation-data for Acer S1003 drm: panel-orientation-quirks: Add quirk for Asus T101HA panel i40e: protect ring accesses with READ- and WRITE_ONCE ixgbe: protect ring accesses with READ- and WRITE_ONCE spi: spidev: fix a potential use-after-free in spidev_release() spi: spidev: fix a race between spidev_release and spidev_remove gpu: host1x: Detach driver on unregister drm/tegra: hub: Do not enable orphaned window group ARM: dts: omap4-droid4: Fix spi configuration and increase rate regmap: fix alignment issue spi: spi-fsl-dspi: Fix external abort on interrupt in resume or exit paths spi: spi-fsl-dspi: use IRQF_SHARED mode to request IRQ spi: spi-fsl-dspi: Fix lockup if device is removed during SPI transfer spi: spi-fsl-dspi: Adding shutdown hook KVM: s390: reduce number of IO pins to 1 ANDROID: GKI: update abi based on padding fields being added ANDROID: GKI: USB: Gadget: add Android ABI padding to struct usb_gadget ANDROID: GKI: sound/usb/card.h: add Android ABI padding to struct snd_usb_endpoint ANDROID: fscrypt: fix DUN contiguity with inline encryption + IV_INO_LBLK_32 policies ANDROID: f2fs: add back compress inode check Linux 4.19.132 efi: Make it possible to disable efivar_ssdt entirely dm zoned: assign max_io_len correctly irqchip/gic: Atomically update affinity MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen cifs: Fix the target file was deleted when rename failed. SMB3: Honor lease disabling for multiuser mounts SMB3: Honor persistent/resilient handle flags for multiuser mounts SMB3: Honor 'seal' flag for multiuser mounts Revert "ALSA: usb-audio: Improve frames size computation" nfsd: apply umask on fs without ACL support i2c: mlxcpld: check correct size of maximum RECV_LEN packet i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665 nvme: fix a crash in nvme_mpath_add_disk SMB3: Honor 'posix' flag for multiuser mounts virtio-blk: free vblk-vqs in error path of virtblk_probe() drm: sun4i: hdmi: Remove extra HPD polling hwmon: (acpi_power_meter) Fix potential memory leak in acpi_power_meter_add() hwmon: (max6697) Make sure the OVERT mask is set correctly cxgb4: fix SGE queue dump destination buffer context cxgb4: use correct type for all-mask IP address comparison cxgb4: parse TC-U32 key values and masks natively cxgb4: use unaligned conversion for fetching timestamp drm/msm/dpu: fix error return code in dpu_encoder_init crypto: af_alg - fix use-after-free in af_alg_accept() due to bh_lock_sock() kgdb: Avoid suspicious RCU usage warning nvme-multipath: fix deadlock between ana_work and scan_work nvme-multipath: set bdi capabilities once s390/debug: avoid kernel warning on too large number of pages usb: usbtest: fix missing kfree(dev->buf) in usbtest_disconnect mm/slub: fix stack overruns with SLUB_STATS mm/slub.c: fix corrupted freechain in deactivate_slab() usbnet: smsc95xx: Fix use-after-free after removal EDAC/amd64: Read back the scrub rate PCI register on F15h mm: fix swap cache node allocation mask btrfs: fix a block group ref counter leak after failure to remove block group ANDROID: Update ABI representation for libabigail update ANDROID: Update the ABI representation ANDROID: Update the ABI xml representation ANDROID: GKI: fix ABI diffs caused by GPU heap and pool vmstat additions ANDROID: sched: consider stune boost margin when computing energy ANDROID: GKI: move abi files to android/ ANDROID: GKI: drop unneeded "_whitelist" off of symbol filenames UPSTREAM: binder: fix null deref of proc->context ANDROID: cpufreq: schedutil: maintain raw cache when next_f is not changed UPSTREAM: net: bpf: Make bpf_ktime_get_ns() available to non GPL programs UPSTREAM: usb: musb: mediatek: add reset FADDR to zero in reset interrupt handle ANDROID: GKI: scripts: Makefile: update the lz4 command (#2) ANDROID: Update the ABI xml representation Revert "drm/dsi: Fix byte order of DCS set/get brightness" Linux 4.19.131 Revert "tty: hvc: Fix data abort due to race in hvc_open" xfs: add agf freeblocks verify in xfs_agf_verify dm writecache: add cond_resched to loop in persistent_memory_claim() dm writecache: correct uncommitted_block when discarding uncommitted entry NFSv4 fix CLOSE not waiting for direct IO compeletion pNFS/flexfiles: Fix list corruption if the mirror count changes SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment() sunrpc: fixed rollback in rpc_gssd_dummy_populate() Staging: rtl8723bs: prevent buffer overflow in update_sta_support_rate() drm/radeon: fix fb_div check in ni_init_smc_spll_table() drm: rcar-du: Fix build error ring-buffer: Zero out time extend if it is nested and not absolute tracing: Fix event trigger to accept redundant spaces arm64: perf: Report the PC value in REGS_ABI_32 mode ocfs2: fix panic on nfs server over ocfs2 ocfs2: fix value of OCFS2_INVALID_SLOT ocfs2: load global_inode_alloc ocfs2: avoid inode removal while nfsd is accessing it mm/slab: use memzero_explicit() in kzfree() btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof btrfs: fix data block group relocation failure due to concurrent scrub x86/asm/64: Align start of __clear_user() loop to 16-bytes KVM: nVMX: Plumb L2 GPA through to PML emulation KVM: X86: Fix MSR range of APIC registers in X2APIC mode erofs: fix partially uninitialized misuse in z_erofs_onlinepage_fixup ACPI: sysfs: Fix pm_profile_attr type ALSA: hda/realtek - Add quirk for MSI GE63 laptop ALSA: hda: Add NVIDIA codec IDs 9a & 9d through a0 to patch table RISC-V: Don't allow write+exec only page mapping request in mmap blktrace: break out of blktrace setup on concurrent calls kbuild: improve cc-option to clean up all temporary files arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n s390/vdso: fix vDSO clock_getres() s390/ptrace: fix setting syscall number net: alx: fix race condition in alx_remove ibmvnic: Harden device login requests hwrng: ks-sa - Fix runtime PM imbalance on error riscv/atomic: Fix sign extension for RV64I drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp() ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function sata_rcar: handle pm_runtime_get_sync failure cases sched/core: Fix PI boosting between RT and DEADLINE tasks sched/deadline: Initialize ->dl_boosted i2c: core: check returned size of emulated smbus block read i2c: fsi: Fix the port number field in status register net: bcmgenet: use hardware padding of runt frames netfilter: ipset: fix unaligned atomic access usb: gadget: udc: Potential Oops in error handling code ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram() cxgb4: move handling L2T ARP failures to caller net: qed: fix excessive QM ILT lines consumption net: qed: fix NVMe login fails over VFs net: qed: fix left elements count calculation RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads() ASoC: rockchip: Fix a reference count leak. RDMA/cma: Protect bind_list and listen_list while finding matching cm id RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532 rxrpc: Fix handling of rwind from an ACK packet ARM: dts: NSP: Correct FA2 mailbox node regmap: Fix memory leak from regmap_register_patch x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get() ARM: dts: Fix duovero smsc interrupt for suspend ASoC: fsl_ssi: Fix bclk calculation for mono channel regualtor: pfuze100: correct sw1a/sw2 on pfuze3000 efi/esrt: Fix reference count leak in esre_create_sysfs_entry. ASoC: q6asm: handle EOS correctly xfrm: Fix double ESP trailer insertion in IPsec crypto offload. cifs/smb3: Fix data inconsistent when zero file range cifs/smb3: Fix data inconsistent when punch hole IB/mad: Fix use after free when destroying MAD agent loop: replace kill_bdev with invalidate_bdev cdc-acm: Add DISABLE_ECHO quirk for Microchip/SMSC chip xhci: Return if xHCI doesn't support LPM xhci: Fix enumeration issue when setting max packet size for FS devices. xhci: Fix incorrect EP_STATE_MASK scsi: zfcp: Fix panic on ERP timeout for previously dismissed ERP action ALSA: usb-audio: Fix OOB access of mixer element list ALSA: usb-audio: add quirk for Samsung USBC Headset (AKG) ALSA: usb-audio: add quirk for Denon DCD-1500RE usb: typec: tcpci_rt1711h: avoid screaming irq causing boot hangs usb: host: ehci-exynos: Fix error check in exynos_ehci_probe() xhci: Poll for U0 after disabling USB2 LPM usb: host: xhci-mtk: avoid runtime suspend when removing hcd USB: ehci: reopen solution for Synopsys HC bug usb: add USB_QUIRK_DELAY_INIT for Logitech C922 usb: dwc2: Postponed gadget registration to the udc class driver USB: ohci-sm501: Add missed iounmap() in remove net: core: reduce recursion limit value net: Do not clear the sock TX queue in sk_set_socket() net: Fix the arp error in some cases sch_cake: don't call diffserv parsing code when it is not needed tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT sch_cake: fix a few style nits sch_cake: don't try to reallocate or unshare skb unconditionally ip_tunnel: fix use-after-free in ip_tunnel_lookup() net: phy: Check harder for errors in get_phy_id() ip6_gre: fix use-after-free in ip6gre_tunnel_lookup() tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes tcp: grow window for OOO packets only for SACK flows tcp: don't ignore ECN CWR on pure ACK sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket rxrpc: Fix notification call on completion of discarded calls rocker: fix incorrect error handling in dma_rings_init net: usb: ax88179_178a: fix packet alignment padding net: increment xmit_recursion level in dev_direct_xmit() net: use correct this_cpu primitive in dev_recursion_level net: place xmit recursion in softnet data net: fix memleak in register_netdevice() net: bridge: enfore alignment for ethernet address mld: fix memory leak in ipv6_mc_destroy_dev() ibmveth: Fix max MTU limit apparmor: don't try to replace stale label in ptraceme check ALSA: hda/realtek - Enable micmute LED on and HP system ALSA: hda/realtek: Enable mute LED on an HP system ALSA: hda/realtek - Enable the headset of ASUS B9450FA with ALC294 fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()" i2c: tegra: Fix Maximum transfer size i2c: tegra: Add missing kerneldoc for some fields i2c: tegra: Cleanup kerneldoc comments EDAC/amd64: Add Family 17h Model 30h PCI IDs net: sched: export __netdev_watchdog_up() net: bcmgenet: remove HFB_CTRL access mtd: rawnand: marvell: Fix the condition on a return code fanotify: fix ignore mask logic for events on child and on dir block/bio-integrity: don't free 'buf' if bio_integrity_add_page() failed net: be more gentle about silly gso requests coming from user ANDROID: lib/vdso: do not update timespec if clock_getres() fails Revert "ANDROID: fscrypt: add key removal notifier chain" ANDROID: update the ABI xml and qcom whitelist ANDROID: fs: export vfs_{read|write} ANDROID: GKI: update abi definitions now that sdcardfs is gone Revert "ANDROID: sdcardfs: Enable modular sdcardfs" Revert "ANDROID: vfs: Add setattr2 for filesystems with per mount permissions" Revert "ANDROID: vfs: fix export symbol type" Revert "ANDROID: vfs: Add permission2 for filesystems with per mount permissions" Revert "ANDROID: vfs: fix export symbol types" Revert "ANDROID: vfs: add d_canonical_path for stacked filesystem support" Revert "ANDROID: fs: Restore vfs_path_lookup() export" ANDROID: sdcardfs: remove sdcardfs from system Revert "ALSA: usb-audio: Improve frames size computation" ANDROID: Makefile: append BUILD_NUMBER to version string when defined ANDROID: GKI: Update ABI for incremental fs ANDROID: GKI: Update cuttlefish whitelist ANDROID: GKI: Disable INCREMENTAL_FS on x86 too ANDROID: cpufreq: schedutil: drop cache when update skipped due to rate limit Linux 4.19.130 KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be generated kvm: x86: Fix reserved bits related calculation errors caused by MKTME kvm: x86: Move kvm_set_mmio_spte_mask() from x86.c to mmu.c md: add feature flag MD_FEATURE_RAID0_LAYOUT Revert "dpaa_eth: fix usage as DSA master, try 3" net: core: device_rename: Use rwsem instead of a seqcount sched/rt, net: Use CONFIG_PREEMPTION.patch kretprobe: Prevent triggering kretprobe from within kprobe_flush_task net: octeon: mgmt: Repair filling of RX ring e1000e: Do not wake up the system via WOL if device wakeup is disabled kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex crypto: algboss - don't wait during notifier callback crypto: algif_skcipher - Cap recv SG list at ctx->used drm/i915/icl+: Fix hotplug interrupt disabling after storm detection drm/i915: Whitelist context-local timestamp in the gen9 cmdparser s390: fix syscall_get_error for compat processes mtd: rawnand: tmio: Fix the probe error path mtd: rawnand: mtk: Fix the probe error path mtd: rawnand: plat_nand: Fix the probe error path mtd: rawnand: socrates: Fix the probe error path mtd: rawnand: oxnas: Fix the probe error path mtd: rawnand: oxnas: Add of_node_put() mtd: rawnand: orion: Fix the probe error path mtd: rawnand: xway: Fix the probe error path mtd: rawnand: sharpsl: Fix the probe error path mtd: rawnand: diskonchip: Fix the probe error path mtd: rawnand: Pass a nand_chip object to nand_release() mtd: rawnand: Pass a nand_chip object to nand_scan() block: nr_sects_write(): Disable preemption on seqcount write x86/boot/compressed: Relax sed symbol type regex for LLVM ld.lld drm/dp_mst: Increase ACT retry timeout to 3s ext4: avoid race conditions when remounting with options that change dax ext4: fix partial cluster initialization when splitting extent selinux: fix double free drm/amdgpu: Replace invalid device ID with a valid device ID drm/qxl: Use correct notify port address when creating cursor ring drm/dp_mst: Reformat drm_dp_check_act_status() a bit drm: encoder_slave: fix refcouting error for modules libata: Use per port sync for detach arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints block: Fix use-after-free in blkdev_get() afs: afs_write_end() should change i_size under the right lock afs: Fix non-setting of mtime when writing into mmap bcache: fix potential deadlock problem in btree_gc_coalesce ext4: stop overwrite the errcode in ext4_setup_super perf report: Fix NULL pointer dereference in hists__fprintf_nr_sample_events() usb/ehci-platform: Set PM runtime as active on resume usb: host: ehci-platform: add a quirk to avoid stuck usb/xhci-plat: Set PM runtime as active on resume xdp: Fix xsk_generic_xmit errno net/filter: Permit reading NET in load_bytes_relative when MAC not set x86/idt: Keep spurious entries unset in system_vectors scsi: acornscsi: Fix an error handling path in acornscsi_probe() drm/sun4i: hdmi ddc clk: Fix size of m divider ASoC: rt5645: Add platform-data for Asus T101HA ASoC: Intel: bytcr_rt5640: Add quirk for Toshiba Encore WT10-A tablet ASoC: core: only convert non DPCM link to DPCM link afs: Fix memory leak in afs_put_sysnames() selftests/net: in timestamping, strncpy needs to preserve null byte drivers/perf: hisi: Fix wrong value for all counters enable NTB: ntb_test: Fix bug when counting remote files NTB: perf: Fix race condition when run with ntb_test NTB: perf: Fix support for hardware that doesn't have port numbers NTB: perf: Don't require one more memory window than number of peers NTB: Revert the change to use the NTB device dev for DMA allocations NTB: ntb_tool: reading the link file should not end in a NULL byte ntb_tool: pass correct struct device to dma_alloc_coherent ntb_perf: pass correct struct device to dma_alloc_coherent gfs2: fix use-after-free on transaction ail lists blktrace: fix endianness for blk_log_remap() blktrace: fix endianness in get_pdu_int() blktrace: use errno instead of bi_status selftests/vm/pkeys: fix alloc_random_pkey() to make it really random elfnote: mark all .note sections SHF_ALLOC include/linux/bitops.h: avoid clang shift-count-overflow warnings lib/zlib: remove outdated and incorrect pre-increment optimization geneve: change from tx_error to tx_dropped on missing metadata crypto: omap-sham - add proper load balancing support for multicore pinctrl: freescale: imx: Fix an error handling path in 'imx_pinctrl_probe()' pinctrl: imxl: Fix an error handling path in 'imx1_pinctrl_core_probe()' scsi: ufs: Don't update urgent bkops level when toggling auto bkops scsi: iscsi: Fix reference count leak in iscsi_boot_create_kobj gfs2: Allow lock_nolock mount to specify jid=X openrisc: Fix issue with argument clobbering for clone/fork rxrpc: Adjust /proc/net/rxrpc/calls to display call->debug_id not user_ID vfio/mdev: Fix reference count leak in add_mdev_supported_type ASoC: fsl_asrc_dma: Fix dma_chan leak when config DMA channel failed extcon: adc-jack: Fix an error handling path in 'adc_jack_probe()' powerpc/4xx: Don't unmap NULL mbase of: Fix a refcounting bug in __of_attach_node_sysfs() NFSv4.1 fix rpc_call_done assignment for BIND_CONN_TO_SESSION net: sunrpc: Fix off-by-one issues in 'rpc_ntop6' clk: sprd: return correct type of value for _sprd_pll_recalc_rate KVM: PPC: Book3S HV: Ignore kmemleak false positives scsi: ufs-qcom: Fix scheduling while atomic issue clk: bcm2835: Fix return type of bcm2835_register_gate scsi: target: tcmu: Fix a use after free in tcmu_check_expired_queue_cmd() ASoC: fix incomplete error-handling in img_i2s_in_probe. x86/apic: Make TSC deadline timer detection message visible RDMA/iw_cxgb4: cleanup device debugfs entries on ULD remove usb: gadget: Fix issue with config_ep_by_speed function usb: gadget: fix potential double-free in m66592_probe. usb: gadget: lpc32xx_udc: don't dereference ep pointer before null check USB: gadget: udc: s3c2410_udc: Remove pointless NULL check in s3c2410_udc_nuke usb: dwc2: gadget: move gadget resume after the core is in L0 state watchdog: da9062: No need to ping manually before setting timeout IB/cma: Fix ports memory leak in cma_configfs PCI: dwc: Fix inner MSI IRQ domain registration PCI/PTM: Inherit Switch Downstream Port PTM settings from Upstream Port dm zoned: return NULL if dmz_get_zone_for_reclaim() fails to find a zone powerpc/64s/pgtable: fix an undefined behaviour arm64: tegra: Fix ethernet phy-mode for Jetson Xavier scsi: target: tcmu: Userspace must not complete queued commands clk: samsung: exynos5433: Add IGNORE_UNUSED flag to sclk_i2s1 fpga: dfl: afu: Corrected error handling levels tty: n_gsm: Fix bogus i++ in gsm_data_kick USB: host: ehci-mxc: Add error handling in ehci_mxc_drv_probe() ASoC: Intel: bytcr_rt5640: Add quirk for Toshiba Encore WT8-A tablet drm/msm/mdp5: Fix mdp5_init error path for failed mdp5_kms allocation usb/ohci-platform: Fix a warning when hibernating vfio-pci: Mask cap zero powerpc/ps3: Fix kexec shutdown hang powerpc/pseries/ras: Fix FWNMI_VALID off by one ipmi: use vzalloc instead of kmalloc for user creation HID: Add quirks for Trust Panora Graphic Tablet tty: n_gsm: Fix waking up upper tty layer when room available tty: n_gsm: Fix SOF skipping powerpc/64: Don't initialise init_task->thread.regs PCI: Fix pci_register_host_bridge() device_register() error handling clk: ti: composite: fix memory leak dlm: remove BUG() before panic() pinctrl: rockchip: fix memleak in rockchip_dt_node_to_map scsi: mpt3sas: Fix double free warnings power: supply: smb347-charger: IRQSTAT_D is volatile power: supply: lp8788: Fix an error handling path in 'lp8788_charger_probe()' scsi: qla2xxx: Fix warning after FC target reset PCI/ASPM: Allow ASPM on links to PCIe-to-PCI/PCI-X Bridges PCI: rcar: Fix incorrect programming of OB windows drivers: base: Fix NULL pointer exception in __platform_driver_probe() if a driver developer is foolish serial: amba-pl011: Make sure we initialize the port.lock spinlock i2c: pxa: fix i2c_pxa_scream_blue_murder() debug output PCI: v3-semi: Fix a memory leak in v3_pci_probe() error handling paths staging: sm750fb: add missing case while setting FB_VISUAL usb: dwc3: gadget: Properly handle failed kick_transfer thermal/drivers/ti-soc-thermal: Avoid dereferencing ERR_PTR slimbus: ngd: get drvdata from correct device tty: hvc: Fix data abort due to race in hvc_open s390/qdio: put thinint indicator after early error ALSA: usb-audio: Fix racy list management in output queue ALSA: usb-audio: Improve frames size computation staging: gasket: Fix mapping refcnt leak when register/store fails staging: gasket: Fix mapping refcnt leak when put attribute fails firmware: qcom_scm: fix bogous abuse of dma-direct internals pinctrl: rza1: Fix wrong array assignment of rza1l_swio_entries scsi: qedf: Fix crash when MFW calls for protocol stats while function is still probing gpio: dwapb: Append MODULE_ALIAS for platform driver ARM: dts: sun8i-h2-plus-bananapi-m2-zero: Fix led polarity scsi: qedi: Do not flush offload work if ARP not resolved arm64: dts: mt8173: fix unit name warnings staging: greybus: fix a missing-check bug in gb_lights_light_config() x86/purgatory: Disable various profiling and sanitizing options apparmor: fix nnp subset test for unconfined scsi: ibmvscsi: Don't send host info in adapter info MAD after LPM scsi: sr: Fix sr_probe() missing deallocate of device minor ASoC: meson: add missing free_irq() in error path apparmor: check/put label on apparmor_sk_clone_security() apparmor: fix introspection of of task mode for unconfined tasks mksysmap: Fix the mismatch of '.L' symbols in System.map NTB: Fix the default port and peer numbers for legacy drivers NTB: ntb_pingpong: Choose doorbells based on port number yam: fix possible memory leak in yam_init_driver pwm: img: Call pm_runtime_put() in pm_runtime_get_sync() failed case powerpc/crashkernel: Take "mem=" option into account PCI: vmd: Filter resource type bits from shadow register nfsd: Fix svc_xprt refcnt leak when setup callback client failed powerpc/perf/hv-24x7: Fix inconsistent output values incase multiple hv-24x7 events run clk: clk-flexgen: fix clock-critical handling scsi: lpfc: Fix lpfc_nodelist leak when processing unsolicited event mfd: wm8994: Fix driver operation if loaded as modules gpio: dwapb: Call acpi_gpiochip_free_interrupts() on GPIO chip de-registration m68k/PCI: Fix a memory leak in an error handling path RDMA/mlx5: Add init2init as a modify command vfio/pci: fix memory leaks in alloc_perm_bits() ps3disk: use the default segment boundary PCI: aardvark: Don't blindly enable ASPM L0s and don't write to read-only register dm mpath: switch paths in dm_blk_ioctl() code path serial: 8250: Fix max baud limit in generic 8250 port usblp: poison URBs upon disconnect clk: samsung: Mark top ISP and CAM clocks on Exynos542x as critical i2c: pxa: clear all master action bits in i2c_pxa_stop_message() f2fs: report delalloc reserve as non-free in statfs for project quota iio: bmp280: fix compensation of humidity scsi: qla2xxx: Fix issue with adapter's stopping state PCI: Allow pci_resize_resource() for devices on root bus ALSA: isa/wavefront: prevent out of bounds write in ioctl ALSA: hda/realtek - Introduce polarity for micmute LED GPIO scsi: qedi: Check for buffer overflow in qedi_set_path() ARM: integrator: Add some Kconfig selections ASoC: davinci-mcasp: Fix dma_chan refcnt leak when getting dma type backlight: lp855x: Ensure regulators are disabled on probe failure clk: qcom: msm8916: Fix the address location of pll->config_reg remoteproc: Fix IDR initialisation in rproc_alloc() iio: pressure: bmp280: Tolerate IRQ before registering i2c: piix4: Detect secondary SMBus controller on AMD AM4 chipsets ASoC: tegra: tegra_wm8903: Support nvidia, headset property clk: sunxi: Fix incorrect usage of round_down() power: supply: bq24257_charger: Replace depends on REGMAP_I2C with select ANDROID: ext4: Optimize match for casefolded encrypted dirs ANDROID: ext4: Handle casefolding with encryption ANDROID: extcon: Remove redundant EXPORT_SYMBOL_GPL ANDROID: update the ABI xml representation ANDROID: GKI: cfg80211: add ABI changes for CONFIG_NL80211_TESTMODE ANDROID: gki_defconfig: x86: Enable KERNEL_LZ4 ANDROID: GKI: scripts: Makefile: update the lz4 command FROMLIST: f2fs: fix use-after-free when accessing bio->bi_crypt_context UPSTREAM: fdt: Update CRC check for rng-seed ANDROID: GKI: Update ABI for incremental fs ANDROID: GKI: Update whitelist and defconfig for incfs ANDROID: Use depmod from the hermetic toolchain Linux 4.19.129 perf symbols: Fix debuginfo search for Ubuntu perf probe: Check address correctness by map instead of _etext perf probe: Fix to check blacklist address correctly perf probe: Do not show the skipped events w1: omap-hdq: cleanup to add missing newline for some dev_dbg mtd: rawnand: pasemi: Fix the probe error path mtd: rawnand: brcmnand: fix hamming oob layout sunrpc: clean up properly in gss_mech_unregister() sunrpc: svcauth_gss_register_pseudoflavor must reject duplicate registrations. kbuild: force to build vmlinux if CONFIG_MODVERSION=y powerpc/64s: Save FSCR to init_task.thread.fscr after feature init powerpc/64s: Don't let DT CPU features set FSCR_DSCR drivers/macintosh: Fix memleak in windfarm_pm112 driver ARM: dts: s5pv210: Set keep-power-in-suspend for SDHCI1 on Aries ARM: dts: at91: sama5d2_ptc_ek: fix vbus pin ARM: dts: exynos: Fix GPIO polarity for thr GalaxyS3 CM36651 sensor's bus ARM: tegra: Correct PL310 Auxiliary Control Register initialization kernel/cpu_pm: Fix uninitted local in cpu_pm alpha: fix memory barriers so that they conform to the specification dm crypt: avoid truncating the logical block size sparc64: fix misuses of access_process_vm() in genregs32_[sg]et() sparc32: fix register window handling in genregs32_[gs]et() gnss: sirf: fix error return code in sirf_probe() pinctrl: samsung: Save/restore eint_mask over suspend for EINT_TYPE GPIOs pinctrl: samsung: Correct setting of eint wakeup mask on s5pv210 power: vexpress: add suppress_bind_attrs to true igb: Report speed and duplex as unknown when device is runtime suspended media: ov5640: fix use of destroyed mutex b43_legacy: Fix connection problem with WPA3 b43: Fix connection problem with WPA3 b43legacy: Fix case where channel status is corrupted Bluetooth: hci_bcm: fix freeing not-requested IRQ media: go7007: fix a miss of snd_card_free carl9170: remove P2P_GO support e1000e: Relax condition to trigger reset for ME workaround e1000e: Disable TSO for buffer overrun workaround PCI: Program MPS for RCiEP devices ima: Call ima_calc_boot_aggregate() in ima_eventdigest_init() btrfs: fix wrong file range cleanup after an error filling dealloc range btrfs: fix error handling when submitting direct I/O bio PCI: Generalize multi-function power dependency device links PCI: Unify ACS quirk desired vs provided checking PCI: Make ACS quirk implementations more uniform serial: 8250_pci: Move Pericom IDs to pci_ids.h PCI: Add Loongson vendor ID x86/amd_nb: Add Family 19h PCI IDs PCI: vmd: Add device id for VMD device 8086:9A0B PCI: Add Amazon's Annapurna Labs vendor ID PCI: Add Genesys Logic, Inc. Vendor ID ALSA: lx6464es - add support for LX6464ESe pci express variant x86/amd_nb: Add PCI device IDs for family 17h, model 70h PCI: mediatek: Add controller support for MT7629 PCI: Enable NVIDIA HDA controllers PCI: Add NVIDIA GPU multi-function power dependencies PCI: Add Synopsys endpoint EDDA Device ID misc: pci_endpoint_test: Add support to test PCI EP in AM654x misc: pci_endpoint_test: Add the layerscape EP device support PCI: Move Rohm Vendor ID to generic list PCI: Move Synopsys HAPS platform device IDs PCI: add USR vendor id and use it in r8169 and w6692 driver x86/amd_nb: Add PCI device IDs for family 17h, model 30h hwmon/k10temp, x86/amd_nb: Consolidate shared device IDs pci:ipmi: Move IPMI PCI class id defines to pci_ids.h PCI: Remove unused NFP32xx IDs PCI: Add ACS quirk for Intel Root Complex Integrated Endpoints PCI: Add ACS quirk for iProc PAXB PCI: Avoid FLR for AMD Starship USB 3.0 PCI: Avoid FLR for AMD Matisse HD Audio & USB 3.0 PCI: Avoid Pericom USB controller OHCI/EHCI PME# defect ext4: fix race between ext4_sync_parent() and rename() ext4: fix error pointer dereference ext4: fix EXT_MAX_EXTENT/INDEX to check for zeroed eh_max evm: Fix possible memory leak in evm_calc_hmac_or_hash() ima: Directly assign the ima_default_policy pointer to ima_rules ima: Fix ima digest hash table key calculation mm: initialize deferred pages with interrupts enabled mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked() btrfs: send: emit file capabilities after chown btrfs: include non-missing as a qualifier for the latest_bdev string.h: fix incompatibility between FORTIFY_SOURCE and KASAN platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type platform/x86: intel-hid: Add a quirk to support HP Spectre X2 (2015) platform/x86: hp-wmi: Convert simple_strtoul() to kstrtou32() cpuidle: Fix three reference count leaks spi: dw: Return any value retrieved from the dma_transfer callback mmc: sdhci-esdhc-imx: fix the mask for tuning start point ixgbe: fix signed-integer-overflow warning mmc: via-sdmmc: Respect the cmd->busy_timeout from the mmc core staging: greybus: sdio: Respect the cmd->busy_timeout from the mmc core mmc: sdhci-msm: Set SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 quirk bcache: fix refcount underflow in bcache_device_free() MIPS: Fix IRQ tracing when call handle_fpe() and handle_msa_fpe() PCI: Don't disable decoding when mmio_always_on is set macvlan: Skip loopback packets in RX handler btrfs: qgroup: mark qgroup inconsistent if we're inherting snapshot to a new qgroup m68k: mac: Don't call via_flush_cache() on Mac IIfx x86/mm: Stop printing BRK addresses crypto: stm32/crc32 - fix multi-instance crypto: stm32/crc32 - fix run-time self test issue. crypto: stm32/crc32 - fix ext4 chksum BUG_ON() mips: Add udelay lpj numbers adjustment mips: MAAR: Use more precise address mask x86/boot: Correct relocation destination on old linkers mwifiex: Fix memory corruption in dump_station rtlwifi: Fix a double free in _rtl_usb_tx_urb_setup() net/mlx5e: IPoIB, Drop multicast packets that this interface sent veth: Adjust hard_start offset on redirect XDP frames md: don't flush workqueue unconditionally in md_open mt76: avoid rx reorder buffer overflow net: qed*: Reduce RX and TX default ring count when running inside kdump kernel wcn36xx: Fix error handling path in 'wcn36xx_probe()' ath10k: Remove msdu from idr when management pkt send fails nvme: refine the Qemu Identify CNS quirk platform/x86: intel-vbtn: Also handle tablet-mode switch on "Detachable" and "Portable" chassis-types platform/x86: intel-vbtn: Do not advertise switches to userspace if they are not there platform/x86: intel-vbtn: Split keymap into buttons and switches parts platform/x86: intel-vbtn: Use acpi_evaluate_integer() xfs: fix duplicate verification from xfs_qm_dqflush() xfs: reset buffer write failure state on successful completion kgdb: Fix spurious true from in_dbg_master() mips: cm: Fix an invalid error code of INTVN_*_ERR MIPS: Truncate link address into 32bit for 32bit kernel Crypto/chcr: fix for ccm(aes) failed test xfs: clean up the error handling in xfs_swap_extents powerpc/spufs: fix copy_to_user while atomic net: allwinner: Fix use correct return type for ndo_start_xmit() media: cec: silence shift wrapping warning in __cec_s_log_addrs() net: lpc-enet: fix error return code in lpc_mii_init() drivers/perf: hisi: Fix typo in events attribute array sched/core: Fix illegal RCU from offline CPUs exit: Move preemption fixup up, move blocking operations down lib/mpi: Fix 64-bit MIPS build with Clang net: bcmgenet: set Rx mode before starting netif selftests/bpf: Fix memory leak in extract_build_id() netfilter: nft_nat: return EOPNOTSUPP if type or flags are not supported audit: fix a net reference leak in audit_list_rules_send() Bluetooth: btbcm: Add 2 missing models to subver tables MIPS: Make sparse_init() using top-down allocation media: platform: fcp: Set appropriate DMA parameters media: dvb: return -EREMOTEIO on i2c transfer failure. audit: fix a net reference leak in audit_send_reply() dt-bindings: display: mediatek: control dpi pins mode to avoid leakage e1000: Distribute switch variables for initialization tools api fs: Make xxx__mountpoint() more scalable brcmfmac: fix wrong location to get firmware feature staging: android: ion: use vmap instead of vm_map_ram net: vmxnet3: fix possible buffer overflow caused by bad DMA value in vmxnet3_get_rss() x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit spi: dw: Fix Rx-only DMA transfers mmc: meson-mx-sdio: trigger a soft reset after a timeout or CRC error batman-adv: Revert "disable ethtool link speed detection when auto negotiation off" ARM: 8978/1: mm: make act_mm() respect THREAD_SIZE btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums clocksource: dw_apb_timer_of: Fix missing clockevent timers clocksource: dw_apb_timer: Make CPU-affiliation being optional spi: dw: Enable interrupts in accordance with DMA xfer mode kgdb: Prevent infinite recursive entries to the debugger kgdb: Disable WARN_CONSOLE_UNLOCKED for all kgdb Bluetooth: Add SCO fallback for invalid LMP parameters error MIPS: Loongson: Build ATI Radeon GPU driver as module ixgbe: Fix XDP redirect on archs with PAGE_SIZE above 4K arm64: insn: Fix two bugs in encoding 32-bit logical immediates spi: dw: Zero DMA Tx and Rx configurations on stack arm64: cacheflush: Fix KGDB trap detection efi/libstub/x86: Work around LLVM ELF quirk build regression net: ena: fix error returning in ena_com_get_hash_function() net: atlantic: make hw_get_regs optional spi: pxa2xx: Apply CS clk quirk to BXT objtool: Ignore empty alternatives media: si2157: Better check for running tuner in init crypto: ccp -- don't "select" CONFIG_DMADEVICES drm: bridge: adv7511: Extend list of audio sample rates ACPI: GED: use correct trigger type field in _Exx / _Lxx handling KVM: arm64: Synchronize sysreg state on injecting an AArch32 exception xen/pvcalls-back: test for errors when calling backend_connect() mmc: sdio: Fix potential NULL pointer error in mmc_sdio_init_card() ARM: dts: at91: sama5d2_ptc_ek: fix sdmmc0 node description mmc: sdhci-msm: Clear tuning done flag while hs400 tuning agp/intel: Reinforce the barrier after GTT updates perf: Add cond_resched() to task_function_call() fat: don't allow to mount if the FAT length == 0 mm/slub: fix a memory leak in sysfs_slab_add() drm/vkms: Hold gem object while still in-use Smack: slab-out-of-bounds in vsscanf ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb ath9x: Fix stack-out-of-bounds Write in ath9k_hif_usb_rx_cb ath9k: Fix use-after-free Write in ath9k_htc_rx_msg ath9k: Fix use-after-free Read in ath9k_wmi_ctrl_rx scsi: megaraid_sas: TM command refire leads to controller firmware crash KVM: arm64: Make vcpu_cp1x() work on Big Endian hosts KVM: MIPS: Fix VPN2_MASK definition for variable cpu_vmbits KVM: MIPS: Define KVM_ENTRYHI_ASID to cpu_asid_mask(&boot_cpu_data) KVM: nVMX: Consult only the "basic" exit reason when routing nested exit KVM: nSVM: leave ASID aside in copy_vmcb_control_area KVM: nSVM: fix condition for filtering async PF video: fbdev: w100fb: Fix a potential double free. proc: Use new_inode not new_inode_pseudo ovl: initialize error in ovl_copy_xattr selftests/net: in rxtimestamp getopt_long needs terminating null entry crypto: virtio: Fix dest length calculation in __virtio_crypto_skcipher_do_req() crypto: virtio: Fix src/dst scatterlist calculation in __virtio_crypto_skcipher_do_req() crypto: virtio: Fix use-after-free in virtio_crypto_skcipher_finalize_req() spi: pxa2xx: Fix runtime PM ref imbalance on probe error spi: pxa2xx: Balance runtime PM enable/disable on error spi: bcm2835: Fix controller unregister order spi: pxa2xx: Fix controller unregister order spi: Fix controller unregister order spi: No need to assign dummy value in spi_unregister_controller() x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for indirect branches. x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS. x86/speculation: Add support for STIBP always-on preferred mode x86/speculation: Change misspelled STIPB to STIBP KVM: x86: only do L1TF workaround on affected processors KVM: x86/mmu: Consolidate "is MMIO SPTE" code kvm: x86: Fix L1TF mitigation for shadow MMU KVM: x86: Fix APIC page invalidation race x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned ALSA: pcm: disallow linking stream to itself crypto: cavium/nitrox - Fix 'nitrox_get_first_device()' when ndevlist is fully iterated PM: runtime: clk: Fix clk_pm_runtime_get() error path spi: bcm-qspi: when tx/rx buffer is NULL set to 0 spi: bcm2835aux: Fix controller unregister order spi: dw: Fix controller unregister order nilfs2: fix null pointer dereference at nilfs_segctor_do_construct() cgroup, blkcg: Prepare some symbols for module and !CONFIG_CGROUP usages ACPI: PM: Avoid using power resources if there are none for D0 ACPI: GED: add support for _Exx / _Lxx handler methods ACPI: CPPC: Fix reference count leak in acpi_cppc_processor_probe() ACPI: sysfs: Fix reference count leak in acpi_sysfs_add_hotplug_profile() ALSA: usb-audio: Add vendor, product and profile name for HP Thunderbolt Dock ALSA: usb-audio: Fix inconsistent card PM state after resume ALSA: hda/realtek - add a pintbl quirk for several Lenovo machines ALSA: es1688: Add the missed snd_card_free() efi/efivars: Add missing kobject_put() in sysfs entry creation error path x86/reboot/quirks: Add MacBook6,1 reboot quirk x86/speculation: Prevent rogue cross-process SSBD shutdown x86/PCI: Mark Intel C620 MROMs as having non-compliant BARs x86_64: Fix jiffies ODR violation btrfs: tree-checker: Check level for leaves and nodes aio: fix async fsync creds mm: add kvfree_sensitive() for freeing sensitive data objects perf probe: Accept the instance number of kretprobe event x86/cpu/amd: Make erratum #1054 a legacy erratum RDMA/uverbs: Make the event_queue fds return POLLERR when disassociated ath9k_htc: Silence undersized packet warnings powerpc/xive: Clear the page tables for the ESB IO mapping drivers/net/ibmvnic: Update VNIC protocol version reporting Input: synaptics - add a second working PNP_ID for Lenovo T470s sched/fair: Don't NUMA balance for kthreads ARM: 8977/1: ptrace: Fix mask for thumb breakpoint hook Input: mms114 - fix handling of mms345l crypto: talitos - fix ECB and CBC algs ivsize btrfs: Detect unbalanced tree with empty leaf before crashing btree operations btrfs: merge btrfs_find_device and find_device lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user() x86: uaccess: Inhibit speculation past access_ok() in user_access_begin() arch/openrisc: Fix issues with access_ok() Fix 'acccess_ok()' on alpha and SH make 'user_access_begin()' do 'access_ok()' selftests: bpf: fix use of undeclared RET_IF macro tun: correct header offsets in napi frags mode vxlan: Avoid infinite loop when suppressing NS messages with invalid options bridge: Avoid infinite loop when suppressing NS messages with invalid options net_failover: fixed rollback in net_failover_open() ipv6: fix IPV6_ADDRFORM operation logic writeback: Drop I_DIRTY_TIME_EXPIRE writeback: Fix sync livelock due to b_dirty_time processing writeback: Avoid skipping inode writeback writeback: Protect inode->i_io_list with inode->i_lock Revert "writeback: Avoid skipping inode writeback" ANDROID: gki_defconfig: increase vbus_draw to 500mA fscrypt: remove stale definition fs-verity: remove unnecessary extern keywords fs-verity: fix all kerneldoc warnings fscrypt: add support for IV_INO_LBLK_32 policies fscrypt: make test_dummy_encryption use v2 by default fscrypt: support test_dummy_encryption=v2 fscrypt: add fscrypt_add_test_dummy_key() linux/parser.h: add include guards fscrypt: remove unnecessary extern keywords fscrypt: name all function parameters fscrypt: fix all kerneldoc warnings ANDROID: Update the ABI ANDROID: GKI: power: power-supply: Add POWER_SUPPLY_PROP_CHARGER_STATUS property ANDROID: GKI: add dev to usb_gsi_request ANDROID: GKI: dma-buf: add dent_count to dma_buf ANDROID: Update the ABI xml and whitelist ANDROID: GKI: update whitelist ANDROID: extcon: Export symbol of `extcon_get_edev_name` ANDROID: kbuild: merge more sections with LTO UPSTREAM: timekeeping/vsyscall: Update VDSO data unconditionally ANDROID: GKI: Revert "genetlink: disallow subscribing to unknown mcast groups" BACKPORT: usb: musb: Add support for MediaTek musb controller UPSTREAM: usb: musb: Add musb_clearb/w() interface UPSTREAM: usb: musb: Add noirq type of dma create interface UPSTREAM: usb: musb: Add get/set toggle hooks UPSTREAM: dt-bindings: usb: musb: Add support for MediaTek musb controller FROMGIT: driver core: Remove unnecessary is_fwnode_dev variable in device_add() FROMGIT: driver core: Remove check in driver_deferred_probe_force_trigger() FROMGIT: of: platform: Batch fwnode parsing when adding all top level devices FROMGIT: BACKPORT: driver core: fw_devlink: Add support for batching fwnode parsing BACKPORT: driver core: Look for waiting consumers only for a fwnode's primary device BACKPORT: driver core: Add device links from fwnode only for the primary device Linux 4.19.128 Revert "net/mlx5: Annotate mutex destroy for root ns" uprobes: ensure that uprobe->offset and ->ref_ctr_offset are properly aligned x86/speculation: Add Ivy Bridge to affected list x86/speculation: Add SRBDS vulnerability and mitigation documentation x86/speculation: Add Special Register Buffer Data Sampling (SRBDS) mitigation x86/cpu: Add 'table' argument to cpu_matches() x86/cpu: Add a steppings field to struct x86_cpu_id nvmem: qfprom: remove incorrect write support CDC-ACM: heed quirk also in error handling staging: rtl8712: Fix IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK tty: hvc_console, fix crashes on parallel open/close vt: keyboard: avoid signed integer overflow in k_ascii usb: musb: Fix runtime PM imbalance on error usb: musb: start session in resume for host port iio: vcnl4000: Fix i2c swapped word reading. USB: serial: option: add Telit LE910C1-EUX compositions USB: serial: usb_wwan: do not resubmit rx urb on fatal errors USB: serial: qcserial: add DW5816e QDL support net: check untrusted gso_size at kernel entry vsock: fix timeout in vsock_accept() NFC: st21nfca: add missed kfree_skb() in an error path net: usb: qmi_wwan: add Telit LE910C1-EUX composition l2tp: do not use inet_hash()/inet_unhash() l2tp: add sk_family checks to l2tp_validate_socket devinet: fix memleak in inetdev_init() Revert "ANDROID: Remove default y on BRIDGE_IGMP_SNOOPING" ANDROID: Update the ABI xml and whitelist ANDROID: GKI: update whitelist ANDROID: arch: arm64: vdso: export the symbols for time() ANDROID: Incremental fs: Remove dependency on PKCS7_MESSAGE_PARSER ANDROID: dm-bow: Add block_size option f2fs: attach IO flags to the missing cases f2fs: add node_io_flag for bio flags likewise data_io_flag f2fs: remove unused parameter of f2fs_put_rpages_mapping() f2fs: handle readonly filesystem in f2fs_ioc_shutdown() f2fs: avoid utf8_strncasecmp() with unstable name f2fs: don't return vmalloc() memory from f2fs_kmalloc() ANDROID: GKI: set CONFIG_BLK_DEV_LOOP_MIN_COUNT to 16 ANDROID: Incremental fs: Cache successful hash calculations ANDROID: Incremental fs: Fix four error-path bugs Linux 4.19.127 net: smsc911x: Fix runtime PM imbalance on error net: ethernet: stmmac: Enable interface clocks on probe for IPQ806x net/ethernet/freescale: rework quiesce/activate for ucc_geth null_blk: return error for invalid zone size s390/mm: fix set_huge_pte_at() for empty ptes drm/edid: Add Oculus Rift S to non-desktop list net: bmac: Fix read of MAC address from ROM x86/mmiotrace: Use cpumask_available() for cpumask_var_t variables i2c: altera: Fix race between xfer_msg and isr thread evm: Fix RCU list related warnings ARC: [plat-eznps]: Restrict to CONFIG_ISA_ARCOMPACT ARC: Fix ICCM & DCCM runtime size checks s390/ftrace: save traced function caller spi: dw: use "smp_mb()" to avoid sending spi data error powerpc/powernv: Avoid re-registration of imc debugfs directory scsi: hisi_sas: Check sas_port before using it drm/i915: fix port checks for MST support on gen >= 11 airo: Fix read overflows sending packets net: dsa: mt7530: set CPU port to fallback mode scsi: ufs: Release clock if DMA map fails mmc: fix compilation of user API kernel/relay.c: handle alloc_percpu returning NULL in relay_open p54usb: add AirVasT USB stick device-id HID: i2c-hid: add Schneider SCL142ALM to descriptor override HID: sony: Fix for broken buttons on DS3 USB dongles mm: Fix mremap not considering huge pmd devmap libnvdimm: Fix endian conversion issues Revert "cgroup: Add memory barriers to plug cgroup_rstat_updated() race window" f2fs: fix retry logic in f2fs_write_cache_pages() ANDROID: Update ABI representation Linux 4.19.126 mm/vmalloc.c: don't dereference possible NULL pointer in __vunmap() netfilter: nf_conntrack_pptp: fix compilation warning with W=1 build bonding: Fix reference count leak in bond_sysfs_slave_add. crypto: chelsio/chtls: properly set tp->lsndtime qlcnic: fix missing release in qlcnic_83xx_interrupt_test. xsk: Add overflow check for u64 division, stored into u32 bnxt_en: Fix accumulation of bp->net_stats_prev. esp6: get the right proto for transport mode in esp6_gso_encap netfilter: nf_conntrack_pptp: prevent buffer overflows in debug code netfilter: nfnetlink_cthelper: unbreak userspace helper support netfilter: ipset: Fix subcounter update skip netfilter: nft_reject_bridge: enable reject with bridge vlan ip_vti: receive ipip packet by calling ip_tunnel_rcv vti4: eliminated some duplicate code. xfrm: fix error in comment xfrm: fix a NULL-ptr deref in xfrm_local_error xfrm: fix a warning in xfrm_policy_insert_list xfrm interface: fix oops when deleting a x-netns interface xfrm: call xfrm_output_gso when inner_protocol is set in xfrm_output xfrm: allow to accept packets with ipv6 NEXTHDR_HOP in xfrm_input copy_xstate_to_kernel(): don't leave parts of destination uninitialized x86/dma: Fix max PFN arithmetic overflow on 32 bit systems mac80211: mesh: fix discovery timer re-arming issue / crash RDMA/core: Fix double destruction of uobject mmc: core: Fix recursive locking issue in CQE recovery path parisc: Fix kernel panic in mem_init() iommu: Fix reference count leak in iommu_group_alloc. include/asm-generic/topology.h: guard cpumask_of_node() macro argument fs/binfmt_elf.c: allocate initialized memory in fill_thread_core_info() mm: remove VM_BUG_ON(PageSlab()) from page_mapcount() IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode libceph: ignore pool overlay and cache logic on redirects ALSA: hda/realtek - Add new codec supported for ALC287 ALSA: usb-audio: Quirks for Gigabyte TRX40 Aorus Master onboard audio exec: Always set cap_ambient in cap_bprm_set_creds ALSA: usb-audio: mixer: volume quirk for ESS Technology Asus USB DAC ALSA: hda/realtek - Add a model for Thinkpad T570 without DAC workaround ALSA: hwdep: fix a left shifting 1 by 31 UB bug RDMA/pvrdma: Fix missing pci disable in pvrdma_pci_probe() mmc: block: Fix use-after-free issue for rpmb ARM: dts: bcm: HR2: Fix PPI interrupt types ARM: dts: bcm2835-rpi-zero-w: Fix led polarity ARM: dts/imx6q-bx50v3: Set display interface clock parents IB/qib: Call kobject_put() when kobject_init_and_add() fails gpio: exar: Fix bad handling for ida_simple_get error path ARM: uaccess: fix DACR mismatch with nested exceptions ARM: uaccess: integrate uaccess_save and uaccess_restore ARM: uaccess: consolidate uaccess asm to asm/uaccess-asm.h ARM: 8843/1: use unified assembler in headers ARM: 8970/1: decompressor: increase tag size Input: synaptics-rmi4 - fix error return code in rmi_driver_probe() Input: synaptics-rmi4 - really fix attn_data use-after-free Input: i8042 - add ThinkPad S230u to i8042 reset list Input: dlink-dir685-touchkeys - fix a typo in driver name Input: xpad - add custom init packet for Xbox One S controllers Input: evdev - call input_flush_device() on release(), not flush() Input: usbtouchscreen - add support for BonXeon TP samples: bpf: Fix build error cifs: Fix null pointer check in cifs_read riscv: stacktrace: Fix undefined reference to `walk_stackframe' IB/i40iw: Remove bogus call to netdev_master_upper_dev_get() net: freescale: select CONFIG_FIXED_PHY where needed usb: gadget: legacy: fix redundant initialization warnings usb: dwc3: pci: Enable extcon driver for Intel Merrifield cachefiles: Fix race between read_waiter and read_copier involving op->to_do gfs2: move privileged user check to gfs2_quota_lock_check net: microchip: encx24j600: add missed kthread_stop ALSA: usb-audio: add mapping for ASRock TRX40 Creator gpio: tegra: mask GPIO IRQs during IRQ shutdown ARM: dts: rockchip: fix pinctrl sub nodename for spi in rk322x.dtsi ARM: dts: rockchip: swap clock-names of gpu nodes arm64: dts: rockchip: swap interrupts interrupt-names rk3399 gpu node arm64: dts: rockchip: fix status for &gmac2phy in rk3328-evb.dts ARM: dts: rockchip: fix phy nodename for rk3228-evb mlxsw: spectrum: Fix use-after-free of split/unsplit/type_set in case reload fails net/mlx4_core: fix a memory leak bug. net: sun: fix missing release regions in cas_init_one(). net/mlx5: Annotate mutex destroy for root ns net/mlx5e: Update netdev txq on completions during closure sctp: Start shutdown on association restart if in SHUTDOWN-SENT state and socket is closed sctp: Don't add the shutdown timer if its already been added r8152: support additional Microsoft Surface Ethernet Adapter variant net sched: fix reporting the first-time use timestamp net: revert "net: get rid of an signed integer overflow in ip_idents_reserve()" net: qrtr: Fix passing invalid reference to qrtr_local_enqueue() net/mlx5: Add command entry handling completion net: ipip: fix wrong address family in init error path net: inet_csk: Fix so_reuseport bind-address cache in tb->fast* __netif_receive_skb_core: pass skb by reference net: dsa: mt7530: fix roaming from DSA user ports dpaa_eth: fix usage as DSA master, try 3 ax25: fix setsockopt(SO_BINDTODEVICE) ANDROID: modules: fix lockprove warning FROMGIT: USB: dummy-hcd: use configurable endpoint naming scheme UPSTREAM: usb: raw-gadget: fix null-ptr-deref when reenabling endpoints UPSTREAM: usb: raw-gadget: documentation updates UPSTREAM: usb: raw-gadget: support stalling/halting/wedging endpoints UPSTREAM: usb: raw-gadget: fix gadget endpoint selection UPSTREAM: usb: raw-gadget: improve uapi headers comments UPSTREAM: usb: raw-gadget: fix return value of ep read ioctls UPSTREAM: usb: raw-gadget: fix raw_event_queue_fetch locking UPSTREAM: usb: raw-gadget: Fix copy_to/from_user() checks f2fs: fix wrong discard space f2fs: compress: don't compress any datas after cp stop f2fs: remove unneeded return value of __insert_discard_tree() f2fs: fix wrong value of tracepoint parameter f2fs: protect new segment allocation in expand_inode_data f2fs: code cleanup by removing ifdef macro surrounding writeback: Avoid skipping inode writeback ANDROID: GKI: Update the ABI ANDROID: GKI: update whitelist ANDROID: GKI: support mm_event for FS/IO/UFS path ANDROID: net: bpf: permit redirect from ingress L3 to egress L2 devices at near max mtu FROMGIT: driver core: Update device link status correctly for SYNC_STATE_ONLY links UPSTREAM: driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links BACKPORT: driver core: Fix SYNC_STATE_ONLY device link implementation ANDROID: Bulk update the ABI xml and qcom whitelist Revert "ANDROID: Incremental fs: Avoid continually recalculating hashes" f2fs: avoid inifinite loop to wait for flushing node pages at cp_error f2fs: compress: fix zstd data corruption f2fs: add compressed/gc data read IO stat f2fs: fix potential use-after-free issue f2fs: compress: don't handle non-compressed data in workqueue f2fs: remove redundant assignment to variable err f2fs: refactor resize_fs to avoid meta updates in progress f2fs: use round_up to enhance calculation f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS f2fs: Avoid double lock for cp_rwsem during checkpoint f2fs: report delalloc reserve as non-free in statfs for project quota f2fs: Fix wrong stub helper update_sit_info f2fs: compress: let lz4 compressor handle output buffer budget properly f2fs: remove blk_plugging in block_operations f2fs: introduce F2FS_IOC_RELEASE_COMPRESS_BLOCKS f2fs: shrink spinlock coverage f2fs: correctly fix the parent inode number during fsync() f2fs: introduce mempool for {,de}compress intermediate page allocation f2fs: introduce f2fs_bmap_compress() f2fs: support fiemap on compressed inode f2fs: support partial truncation on compressed inode f2fs: remove redundant compress inode check f2fs: use strcmp() in parse_options() f2fs: Use the correct style for SPDX License Identifier Conflicts: Documentation/devicetree/bindings Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt Documentation/devicetree/bindings/usb/dwc3.txt drivers/media/v4l2-core/v4l2-ctrls.c drivers/mmc/core/queue.c drivers/mmc/host/sdhci-msm.c drivers/scsi/ufs/ufs-qcom.c drivers/slimbus/qcom-ngd-ctrl.c drivers/usb/gadget/composite.c fs/crypto/keyring.c fs/f2fs/data.c include/linux/fs.h include/linux/usb/gadget.h include/uapi/linux/v4l2-controls.h kernel/sched/cpufreq_schedutil.c kernel/sched/fair.c kernel/time/tick-sched.c mm/vmalloc.c net/netlink/genetlink.c net/qrtr/qrtr.c sound/core/compress_offload.c sound/soc/soc-compress.c Fixed errors: drivers/scsi/ufs/ufshcd.c drivers/soc/qcom/rq_stats.c Change-Id: I06ea6a6c3f239045e2947f27af617aa6f523bfdb Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2646 lines
69 KiB
C
2646 lines
69 KiB
C
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Implementation of the Transmission Control Protocol(TCP).
|
|
*
|
|
* IPv4 specific functions
|
|
*
|
|
*
|
|
* code split from:
|
|
* linux/ipv4/tcp.c
|
|
* linux/ipv4/tcp_input.c
|
|
* linux/ipv4/tcp_output.c
|
|
*
|
|
* See tcp.c for author information
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
/*
|
|
* Changes:
|
|
* David S. Miller : New socket lookup architecture.
|
|
* This code is dedicated to John Dyson.
|
|
* David S. Miller : Change semantics of established hash,
|
|
* half is devoted to TIME_WAIT sockets
|
|
* and the rest go in the other half.
|
|
* Andi Kleen : Add support for syncookies and fixed
|
|
* some bugs: ip options weren't passed to
|
|
* the TCP layer, missed a check for an
|
|
* ACK bit.
|
|
* Andi Kleen : Implemented fast path mtu discovery.
|
|
* Fixed many serious bugs in the
|
|
* request_sock handling and moved
|
|
* most of it into the af independent code.
|
|
* Added tail drop and some other bugfixes.
|
|
* Added new listen semantics.
|
|
* Mike McLagan : Routing by source
|
|
* Juan Jose Ciarlante: ip_dynaddr bits
|
|
* Andi Kleen: various fixes.
|
|
* Vitaly E. Lavrov : Transparent proxy revived after year
|
|
* coma.
|
|
* Andi Kleen : Fix new listen.
|
|
* Andi Kleen : Fix accept error reporting.
|
|
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
|
|
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
|
|
* a single port at the same time.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "TCP: " fmt
|
|
|
|
#include <linux/bottom_half.h>
|
|
#include <linux/types.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/module.h>
|
|
#include <linux/random.h>
|
|
#include <linux/cache.h>
|
|
#include <linux/jhash.h>
|
|
#include <linux/init.h>
|
|
#include <linux/times.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/net_namespace.h>
|
|
#include <net/icmp.h>
|
|
#include <net/inet_hashtables.h>
|
|
#include <net/tcp.h>
|
|
#include <net/transp_v6.h>
|
|
#include <net/ipv6.h>
|
|
#include <net/inet_common.h>
|
|
#include <net/timewait_sock.h>
|
|
#include <net/xfrm.h>
|
|
#include <net/secure_seq.h>
|
|
#include <net/busy_poll.h>
|
|
|
|
#include <linux/inet.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/inetdevice.h>
|
|
|
|
#include <crypto/hash.h>
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <trace/events/tcp.h>
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
|
|
__be32 daddr, __be32 saddr, const struct tcphdr *th);
|
|
#endif
|
|
|
|
struct inet_hashinfo tcp_hashinfo;
|
|
EXPORT_SYMBOL(tcp_hashinfo);
|
|
|
|
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
|
|
{
|
|
return secure_tcp_seq(ip_hdr(skb)->daddr,
|
|
ip_hdr(skb)->saddr,
|
|
tcp_hdr(skb)->dest,
|
|
tcp_hdr(skb)->source);
|
|
}
|
|
|
|
static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
|
|
{
|
|
return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
|
|
}
|
|
|
|
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
|
|
{
|
|
const struct inet_timewait_sock *tw = inet_twsk(sktw);
|
|
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
|
|
|
|
if (reuse == 2) {
|
|
/* Still does not detect *everything* that goes through
|
|
* lo, since we require a loopback src or dst address
|
|
* or direct binding to 'lo' interface.
|
|
*/
|
|
bool loopback = false;
|
|
if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
|
|
loopback = true;
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
if (tw->tw_family == AF_INET6) {
|
|
if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
|
|
(ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
|
|
(tw->tw_v6_daddr.s6_addr[12] == 127)) ||
|
|
ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
|
|
(ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
|
|
(tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
|
|
loopback = true;
|
|
} else
|
|
#endif
|
|
{
|
|
if (ipv4_is_loopback(tw->tw_daddr) ||
|
|
ipv4_is_loopback(tw->tw_rcv_saddr))
|
|
loopback = true;
|
|
}
|
|
if (!loopback)
|
|
reuse = 0;
|
|
}
|
|
|
|
/* With PAWS, it is safe from the viewpoint
|
|
of data integrity. Even without PAWS it is safe provided sequence
|
|
spaces do not overlap i.e. at data rates <= 80Mbit/sec.
|
|
|
|
Actually, the idea is close to VJ's one, only timestamp cache is
|
|
held not per host, but per port pair and TW bucket is used as state
|
|
holder.
|
|
|
|
If TW bucket has been already destroyed we fall back to VJ's scheme
|
|
and use initial timestamp retrieved from peer table.
|
|
*/
|
|
if (tcptw->tw_ts_recent_stamp &&
|
|
(!twp || (reuse && time_after32(ktime_get_seconds(),
|
|
tcptw->tw_ts_recent_stamp)))) {
|
|
/* In case of repair and re-using TIME-WAIT sockets we still
|
|
* want to be sure that it is safe as above but honor the
|
|
* sequence numbers and time stamps set as part of the repair
|
|
* process.
|
|
*
|
|
* Without this check re-using a TIME-WAIT socket with TCP
|
|
* repair would accumulate a -1 on the repair assigned
|
|
* sequence number. The first time it is reused the sequence
|
|
* is -1, the second time -2, etc. This fixes that issue
|
|
* without appearing to create any others.
|
|
*/
|
|
if (likely(!tp->repair)) {
|
|
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
|
|
if (tp->write_seq == 0)
|
|
tp->write_seq = 1;
|
|
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
|
|
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
|
|
}
|
|
sock_hold(sktw);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
|
|
|
|
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
|
|
int addr_len)
|
|
{
|
|
/* This check is replicated from tcp_v4_connect() and intended to
|
|
* prevent BPF program called below from accessing bytes that are out
|
|
* of the bound specified by user in addr_len.
|
|
*/
|
|
if (addr_len < sizeof(struct sockaddr_in))
|
|
return -EINVAL;
|
|
|
|
sock_owned_by_me(sk);
|
|
|
|
return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
|
|
}
|
|
|
|
/* This will initiate an outgoing connection. */
|
|
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|
{
|
|
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
__be16 orig_sport, orig_dport;
|
|
__be32 daddr, nexthop;
|
|
struct flowi4 *fl4;
|
|
struct rtable *rt;
|
|
int err;
|
|
struct ip_options_rcu *inet_opt;
|
|
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
|
|
|
if (addr_len < sizeof(struct sockaddr_in))
|
|
return -EINVAL;
|
|
|
|
if (usin->sin_family != AF_INET)
|
|
return -EAFNOSUPPORT;
|
|
|
|
nexthop = daddr = usin->sin_addr.s_addr;
|
|
inet_opt = rcu_dereference_protected(inet->inet_opt,
|
|
lockdep_sock_is_held(sk));
|
|
if (inet_opt && inet_opt->opt.srr) {
|
|
if (!daddr)
|
|
return -EINVAL;
|
|
nexthop = inet_opt->opt.faddr;
|
|
}
|
|
|
|
orig_sport = inet->inet_sport;
|
|
orig_dport = usin->sin_port;
|
|
fl4 = &inet->cork.fl.u.ip4;
|
|
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
|
|
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
|
|
IPPROTO_TCP,
|
|
orig_sport, orig_dport, sk);
|
|
if (IS_ERR(rt)) {
|
|
err = PTR_ERR(rt);
|
|
if (err == -ENETUNREACH)
|
|
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
|
|
return err;
|
|
}
|
|
|
|
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
|
|
ip_rt_put(rt);
|
|
return -ENETUNREACH;
|
|
}
|
|
|
|
if (!inet_opt || !inet_opt->opt.srr)
|
|
daddr = fl4->daddr;
|
|
|
|
if (!inet->inet_saddr)
|
|
inet->inet_saddr = fl4->saddr;
|
|
sk_rcv_saddr_set(sk, inet->inet_saddr);
|
|
|
|
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
|
|
/* Reset inherited state */
|
|
tp->rx_opt.ts_recent = 0;
|
|
tp->rx_opt.ts_recent_stamp = 0;
|
|
if (likely(!tp->repair))
|
|
tp->write_seq = 0;
|
|
}
|
|
|
|
inet->inet_dport = usin->sin_port;
|
|
sk_daddr_set(sk, daddr);
|
|
|
|
inet_csk(sk)->icsk_ext_hdr_len = 0;
|
|
if (inet_opt)
|
|
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
|
|
|
|
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
|
|
|
|
/* Socket identity is still unknown (sport may be zero).
|
|
* However we set state to SYN-SENT and not releasing socket
|
|
* lock select source port, enter ourselves into the hash tables and
|
|
* complete initialization after this.
|
|
*/
|
|
tcp_set_state(sk, TCP_SYN_SENT);
|
|
err = inet_hash_connect(tcp_death_row, sk);
|
|
if (err)
|
|
goto failure;
|
|
|
|
sk_set_txhash(sk);
|
|
|
|
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
|
|
inet->inet_sport, inet->inet_dport, sk);
|
|
if (IS_ERR(rt)) {
|
|
err = PTR_ERR(rt);
|
|
rt = NULL;
|
|
goto failure;
|
|
}
|
|
/* OK, now commit destination to socket. */
|
|
sk->sk_gso_type = SKB_GSO_TCPV4;
|
|
sk_setup_caps(sk, &rt->dst);
|
|
rt = NULL;
|
|
|
|
if (likely(!tp->repair)) {
|
|
if (!tp->write_seq)
|
|
tp->write_seq = secure_tcp_seq(inet->inet_saddr,
|
|
inet->inet_daddr,
|
|
inet->inet_sport,
|
|
usin->sin_port);
|
|
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
|
|
inet->inet_saddr,
|
|
inet->inet_daddr);
|
|
}
|
|
|
|
inet->inet_id = prandom_u32();
|
|
|
|
if (tcp_fastopen_defer_connect(sk, &err))
|
|
return err;
|
|
if (err)
|
|
goto failure;
|
|
|
|
err = tcp_connect(sk);
|
|
|
|
if (err)
|
|
goto failure;
|
|
|
|
return 0;
|
|
|
|
failure:
|
|
/*
|
|
* This unhashes the socket and releases the local port,
|
|
* if necessary.
|
|
*/
|
|
tcp_set_state(sk, TCP_CLOSE);
|
|
ip_rt_put(rt);
|
|
sk->sk_route_caps = 0;
|
|
inet->inet_dport = 0;
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_connect);
|
|
|
|
/*
|
|
* This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
|
|
* It can be called through tcp_release_cb() if socket was owned by user
|
|
* at the time tcp_v4_err() was called to handle ICMP message.
|
|
*/
|
|
void tcp_v4_mtu_reduced(struct sock *sk)
|
|
{
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
struct dst_entry *dst;
|
|
u32 mtu;
|
|
|
|
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
|
|
return;
|
|
mtu = tcp_sk(sk)->mtu_info;
|
|
dst = inet_csk_update_pmtu(sk, mtu);
|
|
if (!dst)
|
|
return;
|
|
|
|
/* Something is about to be wrong... Remember soft error
|
|
* for the case, if this connection will not able to recover.
|
|
*/
|
|
if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
|
|
sk->sk_err_soft = EMSGSIZE;
|
|
|
|
mtu = dst_mtu(dst);
|
|
|
|
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
|
|
ip_sk_accept_pmtu(sk) &&
|
|
inet_csk(sk)->icsk_pmtu_cookie > mtu) {
|
|
tcp_sync_mss(sk, mtu);
|
|
|
|
/* Resend the TCP packet because it's
|
|
* clear that the old packet has been
|
|
* dropped. This is the new "fast" path mtu
|
|
* discovery.
|
|
*/
|
|
tcp_simple_retransmit(sk);
|
|
} /* else let the usual retransmit timer handle it */
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_mtu_reduced);
|
|
|
|
static void do_redirect(struct sk_buff *skb, struct sock *sk)
|
|
{
|
|
struct dst_entry *dst = __sk_dst_check(sk, 0);
|
|
|
|
if (dst)
|
|
dst->ops->redirect(dst, sk, skb);
|
|
}
|
|
|
|
|
|
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
|
|
void tcp_req_err(struct sock *sk, u32 seq, bool abort)
|
|
{
|
|
struct request_sock *req = inet_reqsk(sk);
|
|
struct net *net = sock_net(sk);
|
|
|
|
/* ICMPs are not backlogged, hence we cannot get
|
|
* an established socket here.
|
|
*/
|
|
if (seq != tcp_rsk(req)->snt_isn) {
|
|
__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
|
|
} else if (abort) {
|
|
/*
|
|
* Still in SYN_RECV, just remove it silently.
|
|
* There is no good way to pass the error to the newly
|
|
* created socket, and POSIX does not want network
|
|
* errors returned from accept().
|
|
*/
|
|
inet_csk_reqsk_queue_drop(req->rsk_listener, req);
|
|
tcp_listendrop(req->rsk_listener);
|
|
}
|
|
reqsk_put(req);
|
|
}
|
|
EXPORT_SYMBOL(tcp_req_err);
|
|
|
|
/*
|
|
* This routine is called by the ICMP module when it gets some
|
|
* sort of error condition. If err < 0 then the socket should
|
|
* be closed and the error returned to the user. If err > 0
|
|
* it's just the icmp type << 8 | icmp code. After adjustment
|
|
* header points to the first 8 bytes of the tcp header. We need
|
|
* to find the appropriate port.
|
|
*
|
|
* The locking strategy used here is very "optimistic". When
|
|
* someone else accesses the socket the ICMP is just dropped
|
|
* and for some paths there is no check at all.
|
|
* A more general error queue to queue errors for later handling
|
|
* is probably better.
|
|
*
|
|
*/
|
|
|
|
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
|
|
{
|
|
const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
|
|
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
|
|
struct inet_connection_sock *icsk;
|
|
struct tcp_sock *tp;
|
|
struct inet_sock *inet;
|
|
const int type = icmp_hdr(icmp_skb)->type;
|
|
const int code = icmp_hdr(icmp_skb)->code;
|
|
struct sock *sk;
|
|
struct sk_buff *skb;
|
|
struct request_sock *fastopen;
|
|
u32 seq, snd_una;
|
|
s32 remaining;
|
|
u32 delta_us;
|
|
int err;
|
|
struct net *net = dev_net(icmp_skb->dev);
|
|
|
|
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
|
|
th->dest, iph->saddr, ntohs(th->source),
|
|
inet_iif(icmp_skb), 0);
|
|
if (!sk) {
|
|
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
|
|
return;
|
|
}
|
|
if (sk->sk_state == TCP_TIME_WAIT) {
|
|
inet_twsk_put(inet_twsk(sk));
|
|
return;
|
|
}
|
|
seq = ntohl(th->seq);
|
|
if (sk->sk_state == TCP_NEW_SYN_RECV)
|
|
return tcp_req_err(sk, seq,
|
|
type == ICMP_PARAMETERPROB ||
|
|
type == ICMP_TIME_EXCEEDED ||
|
|
(type == ICMP_DEST_UNREACH &&
|
|
(code == ICMP_NET_UNREACH ||
|
|
code == ICMP_HOST_UNREACH)));
|
|
|
|
bh_lock_sock(sk);
|
|
/* If too many ICMPs get dropped on busy
|
|
* servers this needs to be solved differently.
|
|
* We do take care of PMTU discovery (RFC1191) special case :
|
|
* we can receive locally generated ICMP messages while socket is held.
|
|
*/
|
|
if (sock_owned_by_user(sk)) {
|
|
if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
|
|
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
|
|
}
|
|
if (sk->sk_state == TCP_CLOSE)
|
|
goto out;
|
|
|
|
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
|
|
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
|
goto out;
|
|
}
|
|
|
|
icsk = inet_csk(sk);
|
|
tp = tcp_sk(sk);
|
|
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
|
|
fastopen = tp->fastopen_rsk;
|
|
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
|
|
if (sk->sk_state != TCP_LISTEN &&
|
|
!between(seq, snd_una, tp->snd_nxt)) {
|
|
__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
|
|
goto out;
|
|
}
|
|
|
|
switch (type) {
|
|
case ICMP_REDIRECT:
|
|
if (!sock_owned_by_user(sk))
|
|
do_redirect(icmp_skb, sk);
|
|
goto out;
|
|
case ICMP_SOURCE_QUENCH:
|
|
/* Just silently ignore these. */
|
|
goto out;
|
|
case ICMP_PARAMETERPROB:
|
|
err = EPROTO;
|
|
break;
|
|
case ICMP_DEST_UNREACH:
|
|
if (code > NR_ICMP_UNREACH)
|
|
goto out;
|
|
|
|
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
|
|
/* We are not interested in TCP_LISTEN and open_requests
|
|
* (SYN-ACKs send out by Linux are always <576bytes so
|
|
* they should go through unfragmented).
|
|
*/
|
|
if (sk->sk_state == TCP_LISTEN)
|
|
goto out;
|
|
|
|
tp->mtu_info = info;
|
|
if (!sock_owned_by_user(sk)) {
|
|
tcp_v4_mtu_reduced(sk);
|
|
} else {
|
|
if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
|
|
sock_hold(sk);
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
err = icmp_err_convert[code].errno;
|
|
/* check if icmp_skb allows revert of backoff
|
|
* (see draft-zimmermann-tcp-lcd) */
|
|
if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
|
|
break;
|
|
if (seq != tp->snd_una || !icsk->icsk_retransmits ||
|
|
!icsk->icsk_backoff || fastopen)
|
|
break;
|
|
|
|
if (sock_owned_by_user(sk))
|
|
break;
|
|
|
|
skb = tcp_rtx_queue_head(sk);
|
|
if (WARN_ON_ONCE(!skb))
|
|
break;
|
|
|
|
icsk->icsk_backoff--;
|
|
icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
|
|
TCP_TIMEOUT_INIT;
|
|
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
|
|
|
|
tcp_mstamp_refresh(tp);
|
|
delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
|
|
remaining = icsk->icsk_rto -
|
|
usecs_to_jiffies(delta_us);
|
|
|
|
if (remaining > 0) {
|
|
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
|
remaining, TCP_RTO_MAX);
|
|
} else {
|
|
/* RTO revert clocked out retransmission.
|
|
* Will retransmit now */
|
|
tcp_retransmit_timer(sk);
|
|
}
|
|
|
|
break;
|
|
case ICMP_TIME_EXCEEDED:
|
|
err = EHOSTUNREACH;
|
|
break;
|
|
default:
|
|
goto out;
|
|
}
|
|
|
|
switch (sk->sk_state) {
|
|
case TCP_SYN_SENT:
|
|
case TCP_SYN_RECV:
|
|
/* Only in fast or simultaneous open. If a fast open socket is
|
|
* is already accepted it is treated as a connected one below.
|
|
*/
|
|
if (fastopen && !fastopen->sk)
|
|
break;
|
|
|
|
if (!sock_owned_by_user(sk)) {
|
|
sk->sk_err = err;
|
|
|
|
sk->sk_error_report(sk);
|
|
|
|
tcp_done(sk);
|
|
} else {
|
|
sk->sk_err_soft = err;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* If we've already connected we will keep trying
|
|
* until we time out, or the user gives up.
|
|
*
|
|
* rfc1122 4.2.3.9 allows to consider as hard errors
|
|
* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
|
|
* but it is obsoleted by pmtu discovery).
|
|
*
|
|
* Note, that in modern internet, where routing is unreliable
|
|
* and in each dark corner broken firewalls sit, sending random
|
|
* errors ordered by their masters even this two messages finally lose
|
|
* their original sense (even Linux sends invalid PORT_UNREACHs)
|
|
*
|
|
* Now we are in compliance with RFCs.
|
|
* --ANK (980905)
|
|
*/
|
|
|
|
inet = inet_sk(sk);
|
|
if (!sock_owned_by_user(sk) && inet->recverr) {
|
|
sk->sk_err = err;
|
|
sk->sk_error_report(sk);
|
|
} else { /* Only an error on timeout */
|
|
sk->sk_err_soft = err;
|
|
}
|
|
|
|
out:
|
|
bh_unlock_sock(sk);
|
|
sock_put(sk);
|
|
}
|
|
|
|
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
|
|
{
|
|
struct tcphdr *th = tcp_hdr(skb);
|
|
|
|
th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
|
|
skb->csum_start = skb_transport_header(skb) - skb->head;
|
|
skb->csum_offset = offsetof(struct tcphdr, check);
|
|
}
|
|
|
|
/* This routine computes an IPv4 TCP checksum. */
|
|
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
|
|
__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_send_check);
|
|
|
|
/*
|
|
* This routine will send an RST to the other tcp.
|
|
*
|
|
* Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
|
|
* for reset.
|
|
* Answer: if a packet caused RST, it is not for a socket
|
|
* existing in our system, if it is matched to a socket,
|
|
* it is just duplicate segment or bug in other side's TCP.
|
|
* So that we build reply only basing on parameters
|
|
* arrived with segment.
|
|
* Exception: precedence violation. We do not implement it in any case.
|
|
*/
|
|
|
|
static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
const struct tcphdr *th = tcp_hdr(skb);
|
|
struct {
|
|
struct tcphdr th;
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
|
|
#endif
|
|
} rep;
|
|
struct ip_reply_arg arg;
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
struct tcp_md5sig_key *key = NULL;
|
|
const __u8 *hash_location = NULL;
|
|
unsigned char newhash[16];
|
|
int genhash;
|
|
struct sock *sk1 = NULL;
|
|
#endif
|
|
struct net *net;
|
|
struct sock *ctl_sk;
|
|
|
|
/* Never send a reset in response to a reset. */
|
|
if (th->rst)
|
|
return;
|
|
|
|
/* If sk not NULL, it means we did a successful lookup and incoming
|
|
* route had to be correct. prequeue might have dropped our dst.
|
|
*/
|
|
if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
|
|
return;
|
|
|
|
/* Swap the send and the receive. */
|
|
memset(&rep, 0, sizeof(rep));
|
|
rep.th.dest = th->source;
|
|
rep.th.source = th->dest;
|
|
rep.th.doff = sizeof(struct tcphdr) / 4;
|
|
rep.th.rst = 1;
|
|
|
|
if (th->ack) {
|
|
rep.th.seq = th->ack_seq;
|
|
} else {
|
|
rep.th.ack = 1;
|
|
rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
|
|
skb->len - (th->doff << 2));
|
|
}
|
|
|
|
memset(&arg, 0, sizeof(arg));
|
|
arg.iov[0].iov_base = (unsigned char *)&rep;
|
|
arg.iov[0].iov_len = sizeof(rep.th);
|
|
|
|
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
rcu_read_lock();
|
|
hash_location = tcp_parse_md5sig_option(th);
|
|
if (sk && sk_fullsock(sk)) {
|
|
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
|
|
&ip_hdr(skb)->saddr, AF_INET);
|
|
} else if (hash_location) {
|
|
/*
|
|
* active side is lost. Try to find listening socket through
|
|
* source port, and then find md5 key through listening socket.
|
|
* we are not loose security here:
|
|
* Incoming packet is checked with md5 hash with finding key,
|
|
* no RST generated if md5 hash doesn't match.
|
|
*/
|
|
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
|
|
ip_hdr(skb)->saddr,
|
|
th->source, ip_hdr(skb)->daddr,
|
|
ntohs(th->source), inet_iif(skb),
|
|
tcp_v4_sdif(skb));
|
|
/* don't send rst if it can't find key */
|
|
if (!sk1)
|
|
goto out;
|
|
|
|
key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
|
|
&ip_hdr(skb)->saddr, AF_INET);
|
|
if (!key)
|
|
goto out;
|
|
|
|
|
|
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
|
|
if (genhash || memcmp(hash_location, newhash, 16) != 0)
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (key) {
|
|
rep.opt[0] = htonl((TCPOPT_NOP << 24) |
|
|
(TCPOPT_NOP << 16) |
|
|
(TCPOPT_MD5SIG << 8) |
|
|
TCPOLEN_MD5SIG);
|
|
/* Update length and the length the header thinks exists */
|
|
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
|
|
rep.th.doff = arg.iov[0].iov_len / 4;
|
|
|
|
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
|
|
key, ip_hdr(skb)->saddr,
|
|
ip_hdr(skb)->daddr, &rep.th);
|
|
}
|
|
#endif
|
|
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
|
|
ip_hdr(skb)->saddr, /* XXX */
|
|
arg.iov[0].iov_len, IPPROTO_TCP, 0);
|
|
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
|
|
arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
|
|
|
|
/* When socket is gone, all binding information is lost.
|
|
* routing might fail in this case. No choice here, if we choose to force
|
|
* input interface, we will misroute in case of asymmetric route.
|
|
*/
|
|
if (sk) {
|
|
arg.bound_dev_if = sk->sk_bound_dev_if;
|
|
if (sk_fullsock(sk))
|
|
trace_tcp_send_reset(sk, skb);
|
|
}
|
|
|
|
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
|
|
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
|
|
|
|
arg.tos = ip_hdr(skb)->tos;
|
|
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
|
|
local_bh_disable();
|
|
ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
|
|
if (sk)
|
|
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
|
|
inet_twsk(sk)->tw_mark : sk->sk_mark;
|
|
ip_send_unicast_reply(ctl_sk,
|
|
skb, &TCP_SKB_CB(skb)->header.h4.opt,
|
|
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
|
|
&arg, arg.iov[0].iov_len);
|
|
|
|
ctl_sk->sk_mark = 0;
|
|
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
|
|
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
|
|
local_bh_enable();
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
out:
|
|
rcu_read_unlock();
|
|
#endif
|
|
}
|
|
|
|
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
|
|
outside socket context is ugly, certainly. What can I do?
|
|
*/
|
|
|
|
static void tcp_v4_send_ack(const struct sock *sk,
|
|
struct sk_buff *skb, u32 seq, u32 ack,
|
|
u32 win, u32 tsval, u32 tsecr, int oif,
|
|
struct tcp_md5sig_key *key,
|
|
int reply_flags, u8 tos)
|
|
{
|
|
const struct tcphdr *th = tcp_hdr(skb);
|
|
struct {
|
|
struct tcphdr th;
|
|
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
|
|
#endif
|
|
];
|
|
} rep;
|
|
struct net *net = sock_net(sk);
|
|
struct ip_reply_arg arg;
|
|
struct sock *ctl_sk;
|
|
|
|
memset(&rep.th, 0, sizeof(struct tcphdr));
|
|
memset(&arg, 0, sizeof(arg));
|
|
|
|
arg.iov[0].iov_base = (unsigned char *)&rep;
|
|
arg.iov[0].iov_len = sizeof(rep.th);
|
|
if (tsecr) {
|
|
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
|
|
(TCPOPT_TIMESTAMP << 8) |
|
|
TCPOLEN_TIMESTAMP);
|
|
rep.opt[1] = htonl(tsval);
|
|
rep.opt[2] = htonl(tsecr);
|
|
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
|
|
}
|
|
|
|
/* Swap the send and the receive. */
|
|
rep.th.dest = th->source;
|
|
rep.th.source = th->dest;
|
|
rep.th.doff = arg.iov[0].iov_len / 4;
|
|
rep.th.seq = htonl(seq);
|
|
rep.th.ack_seq = htonl(ack);
|
|
rep.th.ack = 1;
|
|
rep.th.window = htons(win);
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
if (key) {
|
|
int offset = (tsecr) ? 3 : 0;
|
|
|
|
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
|
|
(TCPOPT_NOP << 16) |
|
|
(TCPOPT_MD5SIG << 8) |
|
|
TCPOLEN_MD5SIG);
|
|
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
|
|
rep.th.doff = arg.iov[0].iov_len/4;
|
|
|
|
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
|
|
key, ip_hdr(skb)->saddr,
|
|
ip_hdr(skb)->daddr, &rep.th);
|
|
}
|
|
#endif
|
|
arg.flags = reply_flags;
|
|
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
|
|
ip_hdr(skb)->saddr, /* XXX */
|
|
arg.iov[0].iov_len, IPPROTO_TCP, 0);
|
|
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
|
|
if (oif)
|
|
arg.bound_dev_if = oif;
|
|
arg.tos = tos;
|
|
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
|
|
local_bh_disable();
|
|
ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
|
|
if (sk)
|
|
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
|
|
inet_twsk(sk)->tw_mark : sk->sk_mark;
|
|
ip_send_unicast_reply(ctl_sk,
|
|
skb, &TCP_SKB_CB(skb)->header.h4.opt,
|
|
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
|
|
&arg, arg.iov[0].iov_len);
|
|
|
|
ctl_sk->sk_mark = 0;
|
|
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
|
|
local_bh_enable();
|
|
}
|
|
|
|
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct inet_timewait_sock *tw = inet_twsk(sk);
|
|
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
|
|
|
tcp_v4_send_ack(sk, skb,
|
|
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
|
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
|
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
|
tcptw->tw_ts_recent,
|
|
tw->tw_bound_dev_if,
|
|
tcp_twsk_md5_key(tcptw),
|
|
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
|
tw->tw_tos
|
|
);
|
|
|
|
inet_twsk_put(tw);
|
|
}
|
|
|
|
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
|
struct request_sock *req)
|
|
{
|
|
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
|
|
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
|
|
*/
|
|
u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
|
|
tcp_sk(sk)->snd_nxt;
|
|
|
|
/* RFC 7323 2.3
|
|
* The window field (SEG.WND) of every outgoing segment, with the
|
|
* exception of <SYN> segments, MUST be right-shifted by
|
|
* Rcv.Wind.Shift bits:
|
|
*/
|
|
tcp_v4_send_ack(sk, skb, seq,
|
|
tcp_rsk(req)->rcv_nxt,
|
|
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
|
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
|
req->ts_recent,
|
|
0,
|
|
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
|
|
AF_INET),
|
|
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
|
ip_hdr(skb)->tos);
|
|
}
|
|
|
|
/*
|
|
* Send a SYN-ACK after having received a SYN.
|
|
* This still operates on a request_sock only, not on a big
|
|
* socket.
|
|
*/
|
|
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
|
|
struct flowi *fl,
|
|
struct request_sock *req,
|
|
struct tcp_fastopen_cookie *foc,
|
|
enum tcp_synack_type synack_type)
|
|
{
|
|
const struct inet_request_sock *ireq = inet_rsk(req);
|
|
struct flowi4 fl4;
|
|
int err = -1;
|
|
struct sk_buff *skb;
|
|
|
|
/* First, grab a route. */
|
|
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
|
|
return -1;
|
|
|
|
skb = tcp_make_synack(sk, dst, req, foc, synack_type);
|
|
|
|
if (skb) {
|
|
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
|
|
|
|
rcu_read_lock();
|
|
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
|
|
ireq->ir_rmt_addr,
|
|
rcu_dereference(ireq->ireq_opt));
|
|
rcu_read_unlock();
|
|
err = net_xmit_eval(err);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* IPv4 request_sock destructor.
|
|
*/
|
|
static void tcp_v4_reqsk_destructor(struct request_sock *req)
|
|
{
|
|
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
|
|
}
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
/*
|
|
* RFC2385 MD5 checksumming requires a mapping of
|
|
* IP address->MD5 Key.
|
|
* We need to maintain these in the sk structure.
|
|
*/
|
|
|
|
/* Find the Key structure for an address. */
|
|
struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
|
|
const union tcp_md5_addr *addr,
|
|
int family)
|
|
{
|
|
const struct tcp_sock *tp = tcp_sk(sk);
|
|
struct tcp_md5sig_key *key;
|
|
const struct tcp_md5sig_info *md5sig;
|
|
__be32 mask;
|
|
struct tcp_md5sig_key *best_match = NULL;
|
|
bool match;
|
|
|
|
/* caller either holds rcu_read_lock() or socket lock */
|
|
md5sig = rcu_dereference_check(tp->md5sig_info,
|
|
lockdep_sock_is_held(sk));
|
|
if (!md5sig)
|
|
return NULL;
|
|
|
|
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
|
|
if (key->family != family)
|
|
continue;
|
|
|
|
if (family == AF_INET) {
|
|
mask = inet_make_mask(key->prefixlen);
|
|
match = (key->addr.a4.s_addr & mask) ==
|
|
(addr->a4.s_addr & mask);
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
} else if (family == AF_INET6) {
|
|
match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
|
|
key->prefixlen);
|
|
#endif
|
|
} else {
|
|
match = false;
|
|
}
|
|
|
|
if (match && (!best_match ||
|
|
key->prefixlen > best_match->prefixlen))
|
|
best_match = key;
|
|
}
|
|
return best_match;
|
|
}
|
|
EXPORT_SYMBOL(tcp_md5_do_lookup);
|
|
|
|
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
|
|
const union tcp_md5_addr *addr,
|
|
int family, u8 prefixlen)
|
|
{
|
|
const struct tcp_sock *tp = tcp_sk(sk);
|
|
struct tcp_md5sig_key *key;
|
|
unsigned int size = sizeof(struct in_addr);
|
|
const struct tcp_md5sig_info *md5sig;
|
|
|
|
/* caller either holds rcu_read_lock() or socket lock */
|
|
md5sig = rcu_dereference_check(tp->md5sig_info,
|
|
lockdep_sock_is_held(sk));
|
|
if (!md5sig)
|
|
return NULL;
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
if (family == AF_INET6)
|
|
size = sizeof(struct in6_addr);
|
|
#endif
|
|
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
|
|
if (key->family != family)
|
|
continue;
|
|
if (!memcmp(&key->addr, addr, size) &&
|
|
key->prefixlen == prefixlen)
|
|
return key;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
|
|
const struct sock *addr_sk)
|
|
{
|
|
const union tcp_md5_addr *addr;
|
|
|
|
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
|
|
return tcp_md5_do_lookup(sk, addr, AF_INET);
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_md5_lookup);
|
|
|
|
/* This can be called on a newly created socket, from other files */
|
|
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
|
|
int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
|
|
gfp_t gfp)
|
|
{
|
|
/* Add Key to the list */
|
|
struct tcp_md5sig_key *key;
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct tcp_md5sig_info *md5sig;
|
|
|
|
key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
|
|
if (key) {
|
|
/* Pre-existing entry - just update that one.
|
|
* Note that the key might be used concurrently.
|
|
*/
|
|
memcpy(key->key, newkey, newkeylen);
|
|
|
|
/* Pairs with READ_ONCE() in tcp_md5_hash_key().
|
|
* Also note that a reader could catch new key->keylen value
|
|
* but old key->key[], this is the reason we use __GFP_ZERO
|
|
* at sock_kmalloc() time below these lines.
|
|
*/
|
|
WRITE_ONCE(key->keylen, newkeylen);
|
|
|
|
return 0;
|
|
}
|
|
|
|
md5sig = rcu_dereference_protected(tp->md5sig_info,
|
|
lockdep_sock_is_held(sk));
|
|
if (!md5sig) {
|
|
md5sig = kmalloc(sizeof(*md5sig), gfp);
|
|
if (!md5sig)
|
|
return -ENOMEM;
|
|
|
|
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
|
|
INIT_HLIST_HEAD(&md5sig->head);
|
|
rcu_assign_pointer(tp->md5sig_info, md5sig);
|
|
}
|
|
|
|
key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
|
|
if (!key)
|
|
return -ENOMEM;
|
|
if (!tcp_alloc_md5sig_pool()) {
|
|
sock_kfree_s(sk, key, sizeof(*key));
|
|
return -ENOMEM;
|
|
}
|
|
|
|
memcpy(key->key, newkey, newkeylen);
|
|
key->keylen = newkeylen;
|
|
key->family = family;
|
|
key->prefixlen = prefixlen;
|
|
memcpy(&key->addr, addr,
|
|
(family == AF_INET6) ? sizeof(struct in6_addr) :
|
|
sizeof(struct in_addr));
|
|
hlist_add_head_rcu(&key->node, &md5sig->head);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(tcp_md5_do_add);
|
|
|
|
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
|
|
u8 prefixlen)
|
|
{
|
|
struct tcp_md5sig_key *key;
|
|
|
|
key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
|
|
if (!key)
|
|
return -ENOENT;
|
|
hlist_del_rcu(&key->node);
|
|
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
|
|
kfree_rcu(key, rcu);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(tcp_md5_do_del);
|
|
|
|
static void tcp_clear_md5_list(struct sock *sk)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
struct tcp_md5sig_key *key;
|
|
struct hlist_node *n;
|
|
struct tcp_md5sig_info *md5sig;
|
|
|
|
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
|
|
|
|
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
|
|
hlist_del_rcu(&key->node);
|
|
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
|
|
kfree_rcu(key, rcu);
|
|
}
|
|
}
|
|
|
|
static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
|
|
char __user *optval, int optlen)
|
|
{
|
|
struct tcp_md5sig cmd;
|
|
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
|
|
u8 prefixlen = 32;
|
|
|
|
if (optlen < sizeof(cmd))
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(&cmd, optval, sizeof(cmd)))
|
|
return -EFAULT;
|
|
|
|
if (sin->sin_family != AF_INET)
|
|
return -EINVAL;
|
|
|
|
if (optname == TCP_MD5SIG_EXT &&
|
|
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
|
|
prefixlen = cmd.tcpm_prefixlen;
|
|
if (prefixlen > 32)
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!cmd.tcpm_keylen)
|
|
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
|
|
AF_INET, prefixlen);
|
|
|
|
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
|
|
return -EINVAL;
|
|
|
|
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
|
|
AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
|
|
GFP_KERNEL);
|
|
}
|
|
|
|
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
|
|
__be32 daddr, __be32 saddr,
|
|
const struct tcphdr *th, int nbytes)
|
|
{
|
|
struct tcp4_pseudohdr *bp;
|
|
struct scatterlist sg;
|
|
struct tcphdr *_th;
|
|
|
|
bp = hp->scratch;
|
|
bp->saddr = saddr;
|
|
bp->daddr = daddr;
|
|
bp->pad = 0;
|
|
bp->protocol = IPPROTO_TCP;
|
|
bp->len = cpu_to_be16(nbytes);
|
|
|
|
_th = (struct tcphdr *)(bp + 1);
|
|
memcpy(_th, th, sizeof(*th));
|
|
_th->check = 0;
|
|
|
|
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
|
|
ahash_request_set_crypt(hp->md5_req, &sg, NULL,
|
|
sizeof(*bp) + sizeof(*th));
|
|
return crypto_ahash_update(hp->md5_req);
|
|
}
|
|
|
|
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
|
|
__be32 daddr, __be32 saddr, const struct tcphdr *th)
|
|
{
|
|
struct tcp_md5sig_pool *hp;
|
|
struct ahash_request *req;
|
|
|
|
hp = tcp_get_md5sig_pool();
|
|
if (!hp)
|
|
goto clear_hash_noput;
|
|
req = hp->md5_req;
|
|
|
|
if (crypto_ahash_init(req))
|
|
goto clear_hash;
|
|
if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
|
|
goto clear_hash;
|
|
if (tcp_md5_hash_key(hp, key))
|
|
goto clear_hash;
|
|
ahash_request_set_crypt(req, NULL, md5_hash, 0);
|
|
if (crypto_ahash_final(req))
|
|
goto clear_hash;
|
|
|
|
tcp_put_md5sig_pool();
|
|
return 0;
|
|
|
|
clear_hash:
|
|
tcp_put_md5sig_pool();
|
|
clear_hash_noput:
|
|
memset(md5_hash, 0, 16);
|
|
return 1;
|
|
}
|
|
|
|
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
|
|
const struct sock *sk,
|
|
const struct sk_buff *skb)
|
|
{
|
|
struct tcp_md5sig_pool *hp;
|
|
struct ahash_request *req;
|
|
const struct tcphdr *th = tcp_hdr(skb);
|
|
__be32 saddr, daddr;
|
|
|
|
if (sk) { /* valid for establish/request sockets */
|
|
saddr = sk->sk_rcv_saddr;
|
|
daddr = sk->sk_daddr;
|
|
} else {
|
|
const struct iphdr *iph = ip_hdr(skb);
|
|
saddr = iph->saddr;
|
|
daddr = iph->daddr;
|
|
}
|
|
|
|
hp = tcp_get_md5sig_pool();
|
|
if (!hp)
|
|
goto clear_hash_noput;
|
|
req = hp->md5_req;
|
|
|
|
if (crypto_ahash_init(req))
|
|
goto clear_hash;
|
|
|
|
if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
|
|
goto clear_hash;
|
|
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
|
|
goto clear_hash;
|
|
if (tcp_md5_hash_key(hp, key))
|
|
goto clear_hash;
|
|
ahash_request_set_crypt(req, NULL, md5_hash, 0);
|
|
if (crypto_ahash_final(req))
|
|
goto clear_hash;
|
|
|
|
tcp_put_md5sig_pool();
|
|
return 0;
|
|
|
|
clear_hash:
|
|
tcp_put_md5sig_pool();
|
|
clear_hash_noput:
|
|
memset(md5_hash, 0, 16);
|
|
return 1;
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
|
|
|
|
#endif
|
|
|
|
/* Called with rcu_read_lock() */
|
|
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
|
|
const struct sk_buff *skb)
|
|
{
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
/*
|
|
* This gets called for each TCP segment that arrives
|
|
* so we want to be efficient.
|
|
* We have 3 drop cases:
|
|
* o No MD5 hash and one expected.
|
|
* o MD5 hash and we're not expecting one.
|
|
* o MD5 hash and its wrong.
|
|
*/
|
|
const __u8 *hash_location = NULL;
|
|
struct tcp_md5sig_key *hash_expected;
|
|
const struct iphdr *iph = ip_hdr(skb);
|
|
const struct tcphdr *th = tcp_hdr(skb);
|
|
int genhash;
|
|
unsigned char newhash[16];
|
|
|
|
hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
|
|
AF_INET);
|
|
hash_location = tcp_parse_md5sig_option(th);
|
|
|
|
/* We've parsed the options - do we have a hash? */
|
|
if (!hash_expected && !hash_location)
|
|
return false;
|
|
|
|
if (hash_expected && !hash_location) {
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
|
|
return true;
|
|
}
|
|
|
|
if (!hash_expected && hash_location) {
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
|
|
return true;
|
|
}
|
|
|
|
/* Okay, so this is hash_expected and hash_location -
|
|
* so we need to calculate the checksum.
|
|
*/
|
|
genhash = tcp_v4_md5_hash_skb(newhash,
|
|
hash_expected,
|
|
NULL, skb);
|
|
|
|
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
|
|
net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
|
|
&iph->saddr, ntohs(th->source),
|
|
&iph->daddr, ntohs(th->dest),
|
|
genhash ? " tcp_v4_calc_md5_hash failed"
|
|
: "");
|
|
return true;
|
|
}
|
|
return false;
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
static void tcp_v4_init_req(struct request_sock *req,
|
|
const struct sock *sk_listener,
|
|
struct sk_buff *skb)
|
|
{
|
|
struct inet_request_sock *ireq = inet_rsk(req);
|
|
struct net *net = sock_net(sk_listener);
|
|
|
|
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
|
|
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
|
|
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
|
|
}
|
|
|
|
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
|
|
struct flowi *fl,
|
|
const struct request_sock *req)
|
|
{
|
|
return inet_csk_route_req(sk, &fl->u.ip4, req);
|
|
}
|
|
|
|
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
|
|
.family = PF_INET,
|
|
.obj_size = sizeof(struct tcp_request_sock),
|
|
.rtx_syn_ack = tcp_rtx_synack,
|
|
.send_ack = tcp_v4_reqsk_send_ack,
|
|
.destructor = tcp_v4_reqsk_destructor,
|
|
.send_reset = tcp_v4_send_reset,
|
|
.syn_ack_timeout = tcp_syn_ack_timeout,
|
|
};
|
|
|
|
static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
|
|
.mss_clamp = TCP_MSS_DEFAULT,
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
.req_md5_lookup = tcp_v4_md5_lookup,
|
|
.calc_md5_hash = tcp_v4_md5_hash_skb,
|
|
#endif
|
|
.init_req = tcp_v4_init_req,
|
|
#ifdef CONFIG_SYN_COOKIES
|
|
.cookie_init_seq = cookie_v4_init_sequence,
|
|
#endif
|
|
.route_req = tcp_v4_route_req,
|
|
.init_seq = tcp_v4_init_seq,
|
|
.init_ts_off = tcp_v4_init_ts_off,
|
|
.send_synack = tcp_v4_send_synack,
|
|
};
|
|
|
|
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
/* Never answer to SYNs send to broadcast or multicast */
|
|
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
|
|
goto drop;
|
|
|
|
return tcp_conn_request(&tcp_request_sock_ops,
|
|
&tcp_request_sock_ipv4_ops, sk, skb);
|
|
|
|
drop:
|
|
tcp_listendrop(sk);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_conn_request);
|
|
|
|
|
|
/*
|
|
* The three way handshake has completed - we got a valid synack -
|
|
* now create the new socket.
|
|
*/
|
|
struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
|
|
struct request_sock *req,
|
|
struct dst_entry *dst,
|
|
struct request_sock *req_unhash,
|
|
bool *own_req)
|
|
{
|
|
struct inet_request_sock *ireq;
|
|
struct inet_sock *newinet;
|
|
struct tcp_sock *newtp;
|
|
struct sock *newsk;
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
struct tcp_md5sig_key *key;
|
|
#endif
|
|
struct ip_options_rcu *inet_opt;
|
|
|
|
if (sk_acceptq_is_full(sk))
|
|
goto exit_overflow;
|
|
|
|
newsk = tcp_create_openreq_child(sk, req, skb);
|
|
if (!newsk)
|
|
goto exit_nonewsk;
|
|
|
|
newsk->sk_gso_type = SKB_GSO_TCPV4;
|
|
inet_sk_rx_dst_set(newsk, skb);
|
|
|
|
newtp = tcp_sk(newsk);
|
|
newinet = inet_sk(newsk);
|
|
ireq = inet_rsk(req);
|
|
sk_daddr_set(newsk, ireq->ir_rmt_addr);
|
|
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
|
|
newsk->sk_bound_dev_if = ireq->ir_iif;
|
|
newinet->inet_saddr = ireq->ir_loc_addr;
|
|
inet_opt = rcu_dereference(ireq->ireq_opt);
|
|
RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
|
|
newinet->mc_index = inet_iif(skb);
|
|
newinet->mc_ttl = ip_hdr(skb)->ttl;
|
|
newinet->rcv_tos = ip_hdr(skb)->tos;
|
|
inet_csk(newsk)->icsk_ext_hdr_len = 0;
|
|
if (inet_opt)
|
|
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
|
|
newinet->inet_id = prandom_u32();
|
|
|
|
if (!dst) {
|
|
dst = inet_csk_route_child_sock(sk, newsk, req);
|
|
if (!dst)
|
|
goto put_and_exit;
|
|
} else {
|
|
/* syncookie case : see end of cookie_v4_check() */
|
|
}
|
|
sk_setup_caps(newsk, dst);
|
|
|
|
tcp_ca_openreq_child(newsk, dst);
|
|
|
|
tcp_sync_mss(newsk, dst_mtu(dst));
|
|
newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
|
|
|
|
tcp_initialize_rcv_mss(newsk);
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
/* Copy over the MD5 key from the original socket */
|
|
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
|
|
AF_INET);
|
|
if (key) {
|
|
/*
|
|
* We're using one, so create a matching key
|
|
* on the newsk structure. If we fail to get
|
|
* memory, then we end up not copying the key
|
|
* across. Shucks.
|
|
*/
|
|
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
|
|
AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
|
|
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
|
|
}
|
|
#endif
|
|
|
|
if (__inet_inherit_port(sk, newsk) < 0)
|
|
goto put_and_exit;
|
|
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
|
|
if (likely(*own_req)) {
|
|
tcp_move_syn(newtp, req);
|
|
ireq->ireq_opt = NULL;
|
|
} else {
|
|
newinet->inet_opt = NULL;
|
|
}
|
|
return newsk;
|
|
|
|
exit_overflow:
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
|
|
exit_nonewsk:
|
|
dst_release(dst);
|
|
exit:
|
|
tcp_listendrop(sk);
|
|
return NULL;
|
|
put_and_exit:
|
|
newinet->inet_opt = NULL;
|
|
inet_csk_prepare_forced_close(newsk);
|
|
tcp_done(newsk);
|
|
goto exit;
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
|
|
|
|
static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
#ifdef CONFIG_SYN_COOKIES
|
|
const struct tcphdr *th = tcp_hdr(skb);
|
|
|
|
if (!th->syn)
|
|
sk = cookie_v4_check(sk, skb);
|
|
#endif
|
|
return sk;
|
|
}
|
|
|
|
/* The socket must have it's spinlock held when we get
|
|
* here, unless it is a TCP_LISTEN socket.
|
|
*
|
|
* We have a potential double-lock case here, so even when
|
|
* doing backlog processing we use the BH locking scheme.
|
|
* This is because we cannot sleep with the original spinlock
|
|
* held.
|
|
*/
|
|
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct sock *rsk;
|
|
|
|
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
|
|
struct dst_entry *dst = sk->sk_rx_dst;
|
|
|
|
sock_rps_save_rxhash(sk, skb);
|
|
sk_mark_napi_id(sk, skb);
|
|
if (dst) {
|
|
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
|
|
!dst->ops->check(dst, 0)) {
|
|
dst_release(dst);
|
|
sk->sk_rx_dst = NULL;
|
|
}
|
|
}
|
|
tcp_rcv_established(sk, skb);
|
|
return 0;
|
|
}
|
|
|
|
if (tcp_checksum_complete(skb))
|
|
goto csum_err;
|
|
|
|
if (sk->sk_state == TCP_LISTEN) {
|
|
struct sock *nsk = tcp_v4_cookie_check(sk, skb);
|
|
|
|
if (!nsk)
|
|
goto discard;
|
|
if (nsk != sk) {
|
|
if (tcp_child_process(sk, nsk, skb)) {
|
|
rsk = nsk;
|
|
goto reset;
|
|
}
|
|
return 0;
|
|
}
|
|
} else
|
|
sock_rps_save_rxhash(sk, skb);
|
|
|
|
if (tcp_rcv_state_process(sk, skb)) {
|
|
rsk = sk;
|
|
goto reset;
|
|
}
|
|
return 0;
|
|
|
|
reset:
|
|
tcp_v4_send_reset(rsk, skb);
|
|
discard:
|
|
kfree_skb(skb);
|
|
/* Be careful here. If this function gets more complicated and
|
|
* gcc suffers from register pressure on the x86, sk (in %ebx)
|
|
* might be destroyed here. This current version compiles correctly,
|
|
* but you have been warned.
|
|
*/
|
|
return 0;
|
|
|
|
csum_err:
|
|
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
|
|
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
|
|
goto discard;
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_do_rcv);
|
|
|
|
int tcp_v4_early_demux(struct sk_buff *skb)
|
|
{
|
|
const struct iphdr *iph;
|
|
const struct tcphdr *th;
|
|
struct sock *sk;
|
|
|
|
if (skb->pkt_type != PACKET_HOST)
|
|
return 0;
|
|
|
|
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
|
|
return 0;
|
|
|
|
iph = ip_hdr(skb);
|
|
th = tcp_hdr(skb);
|
|
|
|
if (th->doff < sizeof(struct tcphdr) / 4)
|
|
return 0;
|
|
|
|
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
|
|
iph->saddr, th->source,
|
|
iph->daddr, ntohs(th->dest),
|
|
skb->skb_iif, inet_sdif(skb));
|
|
if (sk) {
|
|
skb->sk = sk;
|
|
skb->destructor = sock_edemux;
|
|
if (sk_fullsock(sk)) {
|
|
struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
|
|
|
|
if (dst)
|
|
dst = dst_check(dst, 0);
|
|
if (dst &&
|
|
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
|
|
skb_dst_set_noref(skb, dst);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
|
|
|
|
/* Only socket owner can try to collapse/prune rx queues
|
|
* to reduce memory overhead, so add a little headroom here.
|
|
* Few sockets backlog are possibly concurrently non empty.
|
|
*/
|
|
limit += 64*1024;
|
|
|
|
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
|
|
* we can fix skb->truesize to its real value to avoid future drops.
|
|
* This is valid because skb is not yet charged to the socket.
|
|
* It has been noticed pure SACK packets were sometimes dropped
|
|
* (if cooked by drivers without copybreak feature).
|
|
*/
|
|
skb_condense(skb);
|
|
|
|
if (unlikely(sk_add_backlog(sk, skb, limit))) {
|
|
bh_unlock_sock(sk);
|
|
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(tcp_add_backlog);
|
|
|
|
int tcp_filter(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct tcphdr *th = (struct tcphdr *)skb->data;
|
|
|
|
return sk_filter_trim_cap(sk, skb, th->doff * 4);
|
|
}
|
|
EXPORT_SYMBOL(tcp_filter);
|
|
|
|
static void tcp_v4_restore_cb(struct sk_buff *skb)
|
|
{
|
|
memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
|
|
sizeof(struct inet_skb_parm));
|
|
}
|
|
|
|
static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
|
|
const struct tcphdr *th)
|
|
{
|
|
/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
|
|
* barrier() makes sure compiler wont play fool^Waliasing games.
|
|
*/
|
|
memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
|
|
sizeof(struct inet_skb_parm));
|
|
barrier();
|
|
|
|
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
|
|
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
|
skb->len - th->doff * 4);
|
|
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
|
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
|
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
|
|
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
|
|
TCP_SKB_CB(skb)->sacked = 0;
|
|
TCP_SKB_CB(skb)->has_rxtstamp =
|
|
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
|
|
}
|
|
|
|
/*
|
|
* From tcp_input.c
|
|
*/
|
|
|
|
int tcp_v4_rcv(struct sk_buff *skb)
|
|
{
|
|
struct net *net = dev_net(skb->dev);
|
|
int sdif = inet_sdif(skb);
|
|
const struct iphdr *iph;
|
|
const struct tcphdr *th;
|
|
bool refcounted;
|
|
struct sock *sk;
|
|
int ret;
|
|
|
|
if (skb->pkt_type != PACKET_HOST)
|
|
goto discard_it;
|
|
|
|
/* Count it even if it's bad */
|
|
__TCP_INC_STATS(net, TCP_MIB_INSEGS);
|
|
|
|
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
|
|
goto discard_it;
|
|
|
|
th = (const struct tcphdr *)skb->data;
|
|
|
|
if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
|
|
goto bad_packet;
|
|
if (!pskb_may_pull(skb, th->doff * 4))
|
|
goto discard_it;
|
|
|
|
/* An explanation is required here, I think.
|
|
* Packet length and doff are validated by header prediction,
|
|
* provided case of th->doff==0 is eliminated.
|
|
* So, we defer the checks. */
|
|
|
|
if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
|
|
goto csum_error;
|
|
|
|
th = (const struct tcphdr *)skb->data;
|
|
iph = ip_hdr(skb);
|
|
lookup:
|
|
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
|
|
th->dest, sdif, &refcounted);
|
|
if (!sk)
|
|
goto no_tcp_socket;
|
|
|
|
process:
|
|
if (sk->sk_state == TCP_TIME_WAIT)
|
|
goto do_time_wait;
|
|
|
|
if (sk->sk_state == TCP_NEW_SYN_RECV) {
|
|
struct request_sock *req = inet_reqsk(sk);
|
|
bool req_stolen = false;
|
|
struct sock *nsk;
|
|
|
|
sk = req->rsk_listener;
|
|
if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
|
|
sk_drops_add(sk, skb);
|
|
reqsk_put(req);
|
|
goto discard_it;
|
|
}
|
|
if (tcp_checksum_complete(skb)) {
|
|
reqsk_put(req);
|
|
goto csum_error;
|
|
}
|
|
if (unlikely(sk->sk_state != TCP_LISTEN)) {
|
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
|
goto lookup;
|
|
}
|
|
/* We own a reference on the listener, increase it again
|
|
* as we might lose it too soon.
|
|
*/
|
|
sock_hold(sk);
|
|
refcounted = true;
|
|
nsk = NULL;
|
|
if (!tcp_filter(sk, skb)) {
|
|
th = (const struct tcphdr *)skb->data;
|
|
iph = ip_hdr(skb);
|
|
tcp_v4_fill_cb(skb, iph, th);
|
|
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
|
|
}
|
|
if (!nsk) {
|
|
reqsk_put(req);
|
|
if (req_stolen) {
|
|
/* Another cpu got exclusive access to req
|
|
* and created a full blown socket.
|
|
* Try to feed this packet to this socket
|
|
* instead of discarding it.
|
|
*/
|
|
tcp_v4_restore_cb(skb);
|
|
sock_put(sk);
|
|
goto lookup;
|
|
}
|
|
goto discard_and_relse;
|
|
}
|
|
if (nsk == sk) {
|
|
reqsk_put(req);
|
|
tcp_v4_restore_cb(skb);
|
|
} else if (tcp_child_process(sk, nsk, skb)) {
|
|
tcp_v4_send_reset(nsk, skb);
|
|
goto discard_and_relse;
|
|
} else {
|
|
sock_put(sk);
|
|
return 0;
|
|
}
|
|
}
|
|
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
|
|
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
|
goto discard_and_relse;
|
|
}
|
|
|
|
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
|
|
goto discard_and_relse;
|
|
|
|
if (tcp_v4_inbound_md5_hash(sk, skb))
|
|
goto discard_and_relse;
|
|
|
|
nf_reset(skb);
|
|
|
|
if (tcp_filter(sk, skb))
|
|
goto discard_and_relse;
|
|
th = (const struct tcphdr *)skb->data;
|
|
iph = ip_hdr(skb);
|
|
tcp_v4_fill_cb(skb, iph, th);
|
|
|
|
skb->dev = NULL;
|
|
|
|
if (sk->sk_state == TCP_LISTEN) {
|
|
ret = tcp_v4_do_rcv(sk, skb);
|
|
goto put_and_return;
|
|
}
|
|
|
|
sk_incoming_cpu_update(sk);
|
|
|
|
bh_lock_sock_nested(sk);
|
|
tcp_segs_in(tcp_sk(sk), skb);
|
|
ret = 0;
|
|
if (!sock_owned_by_user(sk)) {
|
|
ret = tcp_v4_do_rcv(sk, skb);
|
|
} else if (tcp_add_backlog(sk, skb)) {
|
|
goto discard_and_relse;
|
|
}
|
|
bh_unlock_sock(sk);
|
|
|
|
put_and_return:
|
|
if (refcounted)
|
|
sock_put(sk);
|
|
|
|
return ret;
|
|
|
|
no_tcp_socket:
|
|
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
|
|
goto discard_it;
|
|
|
|
tcp_v4_fill_cb(skb, iph, th);
|
|
|
|
if (tcp_checksum_complete(skb)) {
|
|
csum_error:
|
|
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
|
|
bad_packet:
|
|
__TCP_INC_STATS(net, TCP_MIB_INERRS);
|
|
} else {
|
|
tcp_v4_send_reset(NULL, skb);
|
|
}
|
|
|
|
discard_it:
|
|
/* Discard frame. */
|
|
kfree_skb(skb);
|
|
return 0;
|
|
|
|
discard_and_relse:
|
|
sk_drops_add(sk, skb);
|
|
if (refcounted)
|
|
sock_put(sk);
|
|
goto discard_it;
|
|
|
|
do_time_wait:
|
|
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
|
|
inet_twsk_put(inet_twsk(sk));
|
|
goto discard_it;
|
|
}
|
|
|
|
tcp_v4_fill_cb(skb, iph, th);
|
|
|
|
if (tcp_checksum_complete(skb)) {
|
|
inet_twsk_put(inet_twsk(sk));
|
|
goto csum_error;
|
|
}
|
|
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
|
|
case TCP_TW_SYN: {
|
|
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
|
|
&tcp_hashinfo, skb,
|
|
__tcp_hdrlen(th),
|
|
iph->saddr, th->source,
|
|
iph->daddr, th->dest,
|
|
inet_iif(skb),
|
|
sdif);
|
|
if (sk2) {
|
|
inet_twsk_deschedule_put(inet_twsk(sk));
|
|
sk = sk2;
|
|
tcp_v4_restore_cb(skb);
|
|
refcounted = false;
|
|
goto process;
|
|
}
|
|
}
|
|
/* to ACK */
|
|
/* fall through */
|
|
case TCP_TW_ACK:
|
|
tcp_v4_timewait_ack(sk, skb);
|
|
break;
|
|
case TCP_TW_RST:
|
|
tcp_v4_send_reset(sk, skb);
|
|
inet_twsk_deschedule_put(inet_twsk(sk));
|
|
goto discard_it;
|
|
case TCP_TW_SUCCESS:;
|
|
}
|
|
goto discard_it;
|
|
}
|
|
|
|
static struct timewait_sock_ops tcp_timewait_sock_ops = {
|
|
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
|
|
.twsk_unique = tcp_twsk_unique,
|
|
.twsk_destructor= tcp_twsk_destructor,
|
|
};
|
|
|
|
void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
|
|
{
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
|
|
if (dst && dst_hold_safe(dst)) {
|
|
sk->sk_rx_dst = dst;
|
|
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(inet_sk_rx_dst_set);
|
|
|
|
const struct inet_connection_sock_af_ops ipv4_specific = {
|
|
.queue_xmit = ip_queue_xmit,
|
|
.send_check = tcp_v4_send_check,
|
|
.rebuild_header = inet_sk_rebuild_header,
|
|
.sk_rx_dst_set = inet_sk_rx_dst_set,
|
|
.conn_request = tcp_v4_conn_request,
|
|
.syn_recv_sock = tcp_v4_syn_recv_sock,
|
|
.net_header_len = sizeof(struct iphdr),
|
|
.setsockopt = ip_setsockopt,
|
|
.getsockopt = ip_getsockopt,
|
|
.addr2sockaddr = inet_csk_addr2sockaddr,
|
|
.sockaddr_len = sizeof(struct sockaddr_in),
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_setsockopt = compat_ip_setsockopt,
|
|
.compat_getsockopt = compat_ip_getsockopt,
|
|
#endif
|
|
.mtu_reduced = tcp_v4_mtu_reduced,
|
|
};
|
|
EXPORT_SYMBOL(ipv4_specific);
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
|
|
.md5_lookup = tcp_v4_md5_lookup,
|
|
.calc_md5_hash = tcp_v4_md5_hash_skb,
|
|
.md5_parse = tcp_v4_parse_md5_keys,
|
|
};
|
|
#endif
|
|
|
|
/* NOTE: A lot of things set to zero explicitly by call to
|
|
* sk_alloc() so need not be done here.
|
|
*/
|
|
static int tcp_v4_init_sock(struct sock *sk)
|
|
{
|
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
|
|
tcp_init_sock(sk);
|
|
|
|
icsk->icsk_af_ops = &ipv4_specific;
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
void tcp_v4_destroy_sock(struct sock *sk)
|
|
{
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
|
|
trace_tcp_destroy_sock(sk);
|
|
|
|
tcp_clear_xmit_timers(sk);
|
|
|
|
tcp_cleanup_congestion_control(sk);
|
|
|
|
tcp_cleanup_ulp(sk);
|
|
|
|
/* Cleanup up the write buffer. */
|
|
tcp_write_queue_purge(sk);
|
|
|
|
/* Check if we want to disable active TFO */
|
|
tcp_fastopen_active_disable_ofo_check(sk);
|
|
|
|
/* Cleans up our, hopefully empty, out_of_order_queue. */
|
|
skb_rbtree_purge(&tp->out_of_order_queue);
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
/* Clean up the MD5 key list, if any */
|
|
if (tp->md5sig_info) {
|
|
tcp_clear_md5_list(sk);
|
|
kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
|
|
tp->md5sig_info = NULL;
|
|
}
|
|
#endif
|
|
|
|
/* Clean up a referenced TCP bind bucket. */
|
|
if (inet_csk(sk)->icsk_bind_hash)
|
|
inet_put_port(sk);
|
|
|
|
BUG_ON(tp->fastopen_rsk);
|
|
|
|
/* If socket is aborted during connect operation */
|
|
tcp_free_fastopen_req(tp);
|
|
tcp_fastopen_destroy_cipher(sk);
|
|
tcp_saved_syn_free(tp);
|
|
|
|
sk_sockets_allocated_dec(sk);
|
|
}
|
|
EXPORT_SYMBOL(tcp_v4_destroy_sock);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
/* Proc filesystem TCP sock list dumping. */
|
|
|
|
/*
|
|
* Get next listener socket follow cur. If cur is NULL, get first socket
|
|
* starting from bucket given in st->bucket; when st->bucket is zero the
|
|
* very first socket in the hash table is returned.
|
|
*/
|
|
static void *listening_get_next(struct seq_file *seq, void *cur)
|
|
{
|
|
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
|
|
struct tcp_iter_state *st = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
struct inet_listen_hashbucket *ilb;
|
|
struct hlist_nulls_node *node;
|
|
struct sock *sk = cur;
|
|
|
|
if (!sk) {
|
|
get_head:
|
|
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
|
spin_lock(&ilb->lock);
|
|
sk = sk_nulls_head(&ilb->nulls_head);
|
|
st->offset = 0;
|
|
goto get_sk;
|
|
}
|
|
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
|
++st->num;
|
|
++st->offset;
|
|
|
|
sk = sk_nulls_next(sk);
|
|
get_sk:
|
|
sk_nulls_for_each_from(sk, node) {
|
|
if (!net_eq(sock_net(sk), net))
|
|
continue;
|
|
if (sk->sk_family == afinfo->family)
|
|
return sk;
|
|
}
|
|
spin_unlock(&ilb->lock);
|
|
st->offset = 0;
|
|
if (++st->bucket < INET_LHTABLE_SIZE)
|
|
goto get_head;
|
|
return NULL;
|
|
}
|
|
|
|
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
void *rc;
|
|
|
|
st->bucket = 0;
|
|
st->offset = 0;
|
|
rc = listening_get_next(seq, NULL);
|
|
|
|
while (rc && *pos) {
|
|
rc = listening_get_next(seq, rc);
|
|
--*pos;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static inline bool empty_bucket(const struct tcp_iter_state *st)
|
|
{
|
|
return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
|
|
}
|
|
|
|
/*
|
|
* Get first established socket starting from bucket given in st->bucket.
|
|
* If st->bucket is zero, the very first socket in the hash is returned.
|
|
*/
|
|
static void *established_get_first(struct seq_file *seq)
|
|
{
|
|
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
|
|
struct tcp_iter_state *st = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
void *rc = NULL;
|
|
|
|
st->offset = 0;
|
|
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
|
|
struct sock *sk;
|
|
struct hlist_nulls_node *node;
|
|
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
|
|
|
|
/* Lockless fast path for the common case of empty buckets */
|
|
if (empty_bucket(st))
|
|
continue;
|
|
|
|
spin_lock_bh(lock);
|
|
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
|
|
if (sk->sk_family != afinfo->family ||
|
|
!net_eq(sock_net(sk), net)) {
|
|
continue;
|
|
}
|
|
rc = sk;
|
|
goto out;
|
|
}
|
|
spin_unlock_bh(lock);
|
|
}
|
|
out:
|
|
return rc;
|
|
}
|
|
|
|
static void *established_get_next(struct seq_file *seq, void *cur)
|
|
{
|
|
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
|
|
struct sock *sk = cur;
|
|
struct hlist_nulls_node *node;
|
|
struct tcp_iter_state *st = seq->private;
|
|
struct net *net = seq_file_net(seq);
|
|
|
|
++st->num;
|
|
++st->offset;
|
|
|
|
sk = sk_nulls_next(sk);
|
|
|
|
sk_nulls_for_each_from(sk, node) {
|
|
if (sk->sk_family == afinfo->family &&
|
|
net_eq(sock_net(sk), net))
|
|
return sk;
|
|
}
|
|
|
|
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
|
++st->bucket;
|
|
return established_get_first(seq);
|
|
}
|
|
|
|
static void *established_get_idx(struct seq_file *seq, loff_t pos)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
void *rc;
|
|
|
|
st->bucket = 0;
|
|
rc = established_get_first(seq);
|
|
|
|
while (rc && pos) {
|
|
rc = established_get_next(seq, rc);
|
|
--pos;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
|
|
{
|
|
void *rc;
|
|
struct tcp_iter_state *st = seq->private;
|
|
|
|
st->state = TCP_SEQ_STATE_LISTENING;
|
|
rc = listening_get_idx(seq, &pos);
|
|
|
|
if (!rc) {
|
|
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
|
rc = established_get_idx(seq, pos);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static void *tcp_seek_last_pos(struct seq_file *seq)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
int offset = st->offset;
|
|
int orig_num = st->num;
|
|
void *rc = NULL;
|
|
|
|
switch (st->state) {
|
|
case TCP_SEQ_STATE_LISTENING:
|
|
if (st->bucket >= INET_LHTABLE_SIZE)
|
|
break;
|
|
st->state = TCP_SEQ_STATE_LISTENING;
|
|
rc = listening_get_next(seq, NULL);
|
|
while (offset-- && rc)
|
|
rc = listening_get_next(seq, rc);
|
|
if (rc)
|
|
break;
|
|
st->bucket = 0;
|
|
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
|
/* Fallthrough */
|
|
case TCP_SEQ_STATE_ESTABLISHED:
|
|
if (st->bucket > tcp_hashinfo.ehash_mask)
|
|
break;
|
|
rc = established_get_first(seq);
|
|
while (offset-- && rc)
|
|
rc = established_get_next(seq, rc);
|
|
}
|
|
|
|
st->num = orig_num;
|
|
|
|
return rc;
|
|
}
|
|
|
|
void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
void *rc;
|
|
|
|
if (*pos && *pos == st->last_pos) {
|
|
rc = tcp_seek_last_pos(seq);
|
|
if (rc)
|
|
goto out;
|
|
}
|
|
|
|
st->state = TCP_SEQ_STATE_LISTENING;
|
|
st->num = 0;
|
|
st->bucket = 0;
|
|
st->offset = 0;
|
|
rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
|
|
|
out:
|
|
st->last_pos = *pos;
|
|
return rc;
|
|
}
|
|
EXPORT_SYMBOL(tcp_seq_start);
|
|
|
|
void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
void *rc = NULL;
|
|
|
|
if (v == SEQ_START_TOKEN) {
|
|
rc = tcp_get_idx(seq, 0);
|
|
goto out;
|
|
}
|
|
|
|
switch (st->state) {
|
|
case TCP_SEQ_STATE_LISTENING:
|
|
rc = listening_get_next(seq, v);
|
|
if (!rc) {
|
|
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
|
st->bucket = 0;
|
|
st->offset = 0;
|
|
rc = established_get_first(seq);
|
|
}
|
|
break;
|
|
case TCP_SEQ_STATE_ESTABLISHED:
|
|
rc = established_get_next(seq, v);
|
|
break;
|
|
}
|
|
out:
|
|
++*pos;
|
|
st->last_pos = *pos;
|
|
return rc;
|
|
}
|
|
EXPORT_SYMBOL(tcp_seq_next);
|
|
|
|
void tcp_seq_stop(struct seq_file *seq, void *v)
|
|
{
|
|
struct tcp_iter_state *st = seq->private;
|
|
|
|
switch (st->state) {
|
|
case TCP_SEQ_STATE_LISTENING:
|
|
if (v != SEQ_START_TOKEN)
|
|
spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
|
|
break;
|
|
case TCP_SEQ_STATE_ESTABLISHED:
|
|
if (v)
|
|
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(tcp_seq_stop);
|
|
|
|
static void get_openreq4(const struct request_sock *req,
|
|
struct seq_file *f, int i)
|
|
{
|
|
const struct inet_request_sock *ireq = inet_rsk(req);
|
|
long delta = req->rsk_timer.expires - jiffies;
|
|
|
|
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
|
|
" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
|
|
i,
|
|
ireq->ir_loc_addr,
|
|
ireq->ir_num,
|
|
ireq->ir_rmt_addr,
|
|
ntohs(ireq->ir_rmt_port),
|
|
TCP_SYN_RECV,
|
|
0, 0, /* could print option size, but that is af dependent. */
|
|
1, /* timers active (only the expire timer) */
|
|
jiffies_delta_to_clock_t(delta),
|
|
req->num_timeout,
|
|
from_kuid_munged(seq_user_ns(f),
|
|
sock_i_uid(req->rsk_listener)),
|
|
0, /* non standard timer */
|
|
0, /* open_requests have no inode */
|
|
0,
|
|
req);
|
|
}
|
|
|
|
static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
|
|
{
|
|
int timer_active;
|
|
unsigned long timer_expires;
|
|
const struct tcp_sock *tp = tcp_sk(sk);
|
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
|
|
__be32 dest = inet->inet_daddr;
|
|
__be32 src = inet->inet_rcv_saddr;
|
|
__u16 destp = ntohs(inet->inet_dport);
|
|
__u16 srcp = ntohs(inet->inet_sport);
|
|
__u8 seq_state = sk->sk_state;
|
|
int rx_queue;
|
|
int state;
|
|
|
|
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
|
|
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
|
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
|
timer_active = 1;
|
|
timer_expires = icsk->icsk_timeout;
|
|
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
|
|
timer_active = 4;
|
|
timer_expires = icsk->icsk_timeout;
|
|
} else if (timer_pending(&sk->sk_timer)) {
|
|
timer_active = 2;
|
|
timer_expires = sk->sk_timer.expires;
|
|
} else {
|
|
timer_active = 0;
|
|
timer_expires = jiffies;
|
|
}
|
|
|
|
if (inet->transparent)
|
|
seq_state |= 0x80;
|
|
|
|
state = inet_sk_state_load(sk);
|
|
if (state == TCP_LISTEN)
|
|
rx_queue = sk->sk_ack_backlog;
|
|
else
|
|
/* Because we don't lock the socket,
|
|
* we might find a transient negative value.
|
|
*/
|
|
rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
|
|
tp->copied_seq, 0);
|
|
|
|
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
|
|
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
|
|
i, src, srcp, dest, destp, seq_state,
|
|
tp->write_seq - tp->snd_una,
|
|
rx_queue,
|
|
timer_active,
|
|
jiffies_delta_to_clock_t(timer_expires - jiffies),
|
|
icsk->icsk_retransmits,
|
|
from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
|
|
icsk->icsk_probes_out,
|
|
sock_i_ino(sk),
|
|
refcount_read(&sk->sk_refcnt), sk,
|
|
jiffies_to_clock_t(icsk->icsk_rto),
|
|
jiffies_to_clock_t(icsk->icsk_ack.ato),
|
|
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
|
|
tp->snd_cwnd,
|
|
state == TCP_LISTEN ?
|
|
fastopenq->max_qlen :
|
|
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
|
|
}
|
|
|
|
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
|
|
struct seq_file *f, int i)
|
|
{
|
|
long delta = tw->tw_timer.expires - jiffies;
|
|
__be32 dest, src;
|
|
__u16 destp, srcp;
|
|
|
|
dest = tw->tw_daddr;
|
|
src = tw->tw_rcv_saddr;
|
|
destp = ntohs(tw->tw_dport);
|
|
srcp = ntohs(tw->tw_sport);
|
|
|
|
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
|
|
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
|
|
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
|
|
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
|
|
refcount_read(&tw->tw_refcnt), tw);
|
|
}
|
|
|
|
#define TMPSZ 150
|
|
|
|
static int tcp4_seq_show(struct seq_file *seq, void *v)
|
|
{
|
|
struct tcp_iter_state *st;
|
|
struct sock *sk = v;
|
|
|
|
seq_setwidth(seq, TMPSZ - 1);
|
|
if (v == SEQ_START_TOKEN) {
|
|
seq_puts(seq, " sl local_address rem_address st tx_queue "
|
|
"rx_queue tr tm->when retrnsmt uid timeout "
|
|
"inode");
|
|
goto out;
|
|
}
|
|
st = seq->private;
|
|
|
|
if (sk->sk_state == TCP_TIME_WAIT)
|
|
get_timewait4_sock(v, seq, st->num);
|
|
else if (sk->sk_state == TCP_NEW_SYN_RECV)
|
|
get_openreq4(v, seq, st->num);
|
|
else
|
|
get_tcp4_sock(v, seq, st->num);
|
|
out:
|
|
seq_pad(seq, '\n');
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations tcp4_seq_ops = {
|
|
.show = tcp4_seq_show,
|
|
.start = tcp_seq_start,
|
|
.next = tcp_seq_next,
|
|
.stop = tcp_seq_stop,
|
|
};
|
|
|
|
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
|
|
.family = AF_INET,
|
|
};
|
|
|
|
static int __net_init tcp4_proc_init_net(struct net *net)
|
|
{
|
|
if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
|
|
sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
static void __net_exit tcp4_proc_exit_net(struct net *net)
|
|
{
|
|
remove_proc_entry("tcp", net->proc_net);
|
|
}
|
|
|
|
static struct pernet_operations tcp4_net_ops = {
|
|
.init = tcp4_proc_init_net,
|
|
.exit = tcp4_proc_exit_net,
|
|
};
|
|
|
|
int __init tcp4_proc_init(void)
|
|
{
|
|
return register_pernet_subsys(&tcp4_net_ops);
|
|
}
|
|
|
|
void tcp4_proc_exit(void)
|
|
{
|
|
unregister_pernet_subsys(&tcp4_net_ops);
|
|
}
|
|
#endif /* CONFIG_PROC_FS */
|
|
|
|
struct proto tcp_prot = {
|
|
.name = "TCP",
|
|
.owner = THIS_MODULE,
|
|
.close = tcp_close,
|
|
.pre_connect = tcp_v4_pre_connect,
|
|
.connect = tcp_v4_connect,
|
|
.disconnect = tcp_disconnect,
|
|
.accept = inet_csk_accept,
|
|
.ioctl = tcp_ioctl,
|
|
.init = tcp_v4_init_sock,
|
|
.destroy = tcp_v4_destroy_sock,
|
|
.shutdown = tcp_shutdown,
|
|
.setsockopt = tcp_setsockopt,
|
|
.getsockopt = tcp_getsockopt,
|
|
.keepalive = tcp_set_keepalive,
|
|
.recvmsg = tcp_recvmsg,
|
|
.sendmsg = tcp_sendmsg,
|
|
.sendpage = tcp_sendpage,
|
|
.backlog_rcv = tcp_v4_do_rcv,
|
|
.release_cb = tcp_release_cb,
|
|
.hash = inet_hash,
|
|
.unhash = inet_unhash,
|
|
.get_port = inet_csk_get_port,
|
|
.enter_memory_pressure = tcp_enter_memory_pressure,
|
|
.leave_memory_pressure = tcp_leave_memory_pressure,
|
|
.stream_memory_free = tcp_stream_memory_free,
|
|
.sockets_allocated = &tcp_sockets_allocated,
|
|
.orphan_count = &tcp_orphan_count,
|
|
.memory_allocated = &tcp_memory_allocated,
|
|
.memory_pressure = &tcp_memory_pressure,
|
|
.sysctl_mem = sysctl_tcp_mem,
|
|
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
|
|
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
|
|
.max_header = MAX_TCP_HEADER,
|
|
.obj_size = sizeof(struct tcp_sock),
|
|
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
|
.twsk_prot = &tcp_timewait_sock_ops,
|
|
.rsk_prot = &tcp_request_sock_ops,
|
|
.h.hashinfo = &tcp_hashinfo,
|
|
.no_autobind = true,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_setsockopt = compat_tcp_setsockopt,
|
|
.compat_getsockopt = compat_tcp_getsockopt,
|
|
#endif
|
|
.diag_destroy = tcp_abort,
|
|
};
|
|
EXPORT_SYMBOL(tcp_prot);
|
|
|
|
static void __net_exit tcp_sk_exit(struct net *net)
|
|
{
|
|
int cpu;
|
|
|
|
if (net->ipv4.tcp_congestion_control)
|
|
module_put(net->ipv4.tcp_congestion_control->owner);
|
|
|
|
for_each_possible_cpu(cpu)
|
|
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
|
|
free_percpu(net->ipv4.tcp_sk);
|
|
}
|
|
|
|
static int __net_init tcp_sk_init(struct net *net)
|
|
{
|
|
int res, cpu, cnt;
|
|
|
|
net->ipv4.tcp_sk = alloc_percpu(struct sock *);
|
|
if (!net->ipv4.tcp_sk)
|
|
return -ENOMEM;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
struct sock *sk;
|
|
|
|
res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
|
|
IPPROTO_TCP, net);
|
|
if (res)
|
|
goto fail;
|
|
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
|
|
|
|
/* Please enforce IP_DF and IPID==0 for RST and
|
|
* ACK sent in SYN-RECV and TIME-WAIT state.
|
|
*/
|
|
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
|
|
|
|
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
|
|
}
|
|
|
|
net->ipv4.sysctl_tcp_ecn = 2;
|
|
net->ipv4.sysctl_tcp_ecn_fallback = 1;
|
|
|
|
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
|
|
net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
|
|
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
|
|
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
|
|
|
|
net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
|
|
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
|
|
net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
|
|
|
|
net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
|
|
net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
|
|
net->ipv4.sysctl_tcp_syncookies = 1;
|
|
net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
|
|
net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
|
|
net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
|
|
net->ipv4.sysctl_tcp_orphan_retries = 0;
|
|
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
|
|
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
|
|
net->ipv4.sysctl_tcp_tw_reuse = 2;
|
|
|
|
cnt = tcp_hashinfo.ehash_mask + 1;
|
|
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
|
|
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
|
|
|
|
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
|
|
net->ipv4.sysctl_tcp_sack = 1;
|
|
net->ipv4.sysctl_tcp_window_scaling = 1;
|
|
net->ipv4.sysctl_tcp_timestamps = 1;
|
|
net->ipv4.sysctl_tcp_early_retrans = 3;
|
|
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
|
|
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
|
|
net->ipv4.sysctl_tcp_retrans_collapse = 1;
|
|
net->ipv4.sysctl_tcp_max_reordering = 300;
|
|
net->ipv4.sysctl_tcp_dsack = 1;
|
|
net->ipv4.sysctl_tcp_app_win = 31;
|
|
net->ipv4.sysctl_tcp_adv_win_scale = 1;
|
|
net->ipv4.sysctl_tcp_frto = 2;
|
|
net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
|
|
/* This limits the percentage of the congestion window which we
|
|
* will allow a single TSO frame to consume. Building TSO frames
|
|
* which are too large can cause TCP streams to be bursty.
|
|
*/
|
|
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
|
|
/* Default TSQ limit of four TSO segments */
|
|
net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
|
|
/* rfc5961 challenge ack rate limiting */
|
|
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
|
|
net->ipv4.sysctl_tcp_min_tso_segs = 2;
|
|
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
|
|
net->ipv4.sysctl_tcp_autocorking = 1;
|
|
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
|
|
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
|
|
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
|
|
if (net != &init_net) {
|
|
memcpy(net->ipv4.sysctl_tcp_rmem,
|
|
init_net.ipv4.sysctl_tcp_rmem,
|
|
sizeof(init_net.ipv4.sysctl_tcp_rmem));
|
|
memcpy(net->ipv4.sysctl_tcp_wmem,
|
|
init_net.ipv4.sysctl_tcp_wmem,
|
|
sizeof(init_net.ipv4.sysctl_tcp_wmem));
|
|
}
|
|
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
|
|
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
|
|
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
|
|
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
|
|
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
|
|
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
|
|
|
|
/* Reno is always built in */
|
|
if (!net_eq(net, &init_net) &&
|
|
try_module_get(init_net.ipv4.tcp_congestion_control->owner))
|
|
net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
|
|
else
|
|
net->ipv4.tcp_congestion_control = &tcp_reno;
|
|
|
|
return 0;
|
|
fail:
|
|
tcp_sk_exit(net);
|
|
|
|
return res;
|
|
}
|
|
|
|
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
|
|
{
|
|
struct net *net;
|
|
|
|
inet_twsk_purge(&tcp_hashinfo, AF_INET);
|
|
|
|
list_for_each_entry(net, net_exit_list, exit_list)
|
|
tcp_fastopen_ctx_destroy(net);
|
|
}
|
|
|
|
static struct pernet_operations __net_initdata tcp_sk_ops = {
|
|
.init = tcp_sk_init,
|
|
.exit = tcp_sk_exit,
|
|
.exit_batch = tcp_sk_exit_batch,
|
|
};
|
|
|
|
void __init tcp_v4_init(void)
|
|
{
|
|
if (register_pernet_subsys(&tcp_sk_ops))
|
|
panic("Failed to create the TCP control socket.\n");
|
|
}
|