Merge android-4.19.95 (5da1114) into msm-4.19

* refs/heads/tmp-5da1114:
  Revert crypto changes from android-4.19.79-95
  Revert "UPSTREAM: PM / wakeup updates"
  Revert "ANDROID: of: property: Enable of_devlink by default"
  Revert "UPSTREAM: dt-bindings: arm: coresight: Add support for coresight-loses-context-with-cpu"
  UPSTREAM: net: usbnet: Fix -Wcast-function-type
  UPSTREAM: USB: dummy-hcd: use usb_urb_dir_in instead of usb_pipein
  UPSTREAM: USB: dummy-hcd: increase max number of devices to 32
  ANDROID: tty: serdev: Fix broken serial console input
  ANDROID: update kernel ABI (perf_event changes)
  BACKPORT: perf_event: Add support for LSM and SELinux checks
  UPSTREAM: iommu: Allow io-pgtable to be used outside of drivers/iommu/
  ANDROID: update abi for 4.19.94 release
  ANDROID: update abi due to revert
  Revert "BACKPORT: perf_event: Add support for LSM and SELinux checks"
  UPSTREAM: selinux: sidtab reverse lookup hash table
  UPSTREAM: selinux: avoid atomic_t usage in sidtab
  UPSTREAM: selinux: check sidtab limit before adding a new entry
  UPSTREAM: selinux: fix context string corruption in convert_context()
  UPSTREAM: selinux: overhaul sidtab to fix bug and improve performance
  UPSTREAM: selinux: refactor mls_context_to_sid() and make it stricter
  UPSTREAM: selinux: use separate table for initial SID lookup
  UPSTREAM: selinux: make "selinux_policycap_names[]" const char *
  UPSTREAM: selinux: refactor sidtab conversion
  ANDROID: Update ABI representation
  ANDROID: GKI: clk: Don't disable unused clocks with sync state support
  ANDROID: GKI: clk: Add support for clock providers with sync state
  ANDROID: GKI: driver core: Add dev_has_sync_state()
  ANDROID: update kernel ABI representation
  BACKPORT: perf_event: Add support for LSM and SELinux checks
  ANDROID: update ABI representation
  UPSTREAM: exit: panic before exit_mm() on global init exit
  ANDROID: serdev: Fix platform device support
  ANDROID: Kconfig.gki: Add Hidden SPRD DRM configs
  ANDROID: gki_defconfig: Disable TRANSPARENT_HUGEPAGE
  ANDROID: gki_defconfig: Enable CONFIG_GNSS_CMDLINE_SERIAL
  ANDROID: gnss: Add command line test driver
  ANDROID: serdev: add platform device support
  ANDROID: gki_defconfig: enable ARM64_SW_TTBR0_PAN
  ANDROID: gki_defconfig: Set BINFMT_MISC as =m
  UPSTREAM: binder: fix incorrect calculation for num_valid
  ABI: Update ABI after f2fs merge
  ANDROID: add initial ABI whitelist for android-4.19
  ANDROID: staging: android: ion: Fix build when CONFIG_ION_SYSTEM_HEAP=n
  ANDROID: staging: android: ion: Expose total heap and pool sizes via sysfs
  ANDROID: Update ABI representation due to vmstat counter changes
  UPSTREAM: include/linux/slab.h: fix sparse warning in kmalloc_type()
  UPSTREAM: mm, slab: shorten kmalloc cache names for large sizes
  UPSTREAM: mm, proc: add KReclaimable to /proc/meminfo
  UPSTREAM: mm: rename and change semantics of nr_indirectly_reclaimable_bytes
  UPSTREAM: dcache: allocate external names from reclaimable kmalloc caches
  UPSTREAM: mm, slab/slub: introduce kmalloc-reclaimable caches
  UPSTREAM: mm, slab: combine kmalloc_caches and kmalloc_dma_caches
  ANDROID: abi update for 4.19.89
  ANDROID: update abi_gki_aarch64.xml for LTO, CFI, and SCS
  ANDROID: gki_defconfig: enable LTO, CFI, and SCS
  ANDROID: update abi_gki_aarch64.xml for CONFIG_GNSS
  ANDROID: cuttlefish_defconfig: Enable CONFIG_GNSS
  UPSTREAM: arm64: Validate tagged addresses in access_ok() called from kernel threads
  ANDROID: mm: Throttle rss_stat tracepoint
  UPSTREAM: mm: slub: really fix slab walking for init_on_free
  ANDROID: update abi_gki_aarch64.xml for nf change
  ANDROID: kbuild: limit LTO inlining
  ANDROID: kbuild: merge module sections with LTO
  ANDROID: netfilter: nf_nat: remove static from nf_nat_ipv4_fn
  UPSTREAM: drm/client: remove the exporting of drm_client_close
  ANDROID: f2fs: fix possible merge of unencrypted with encrypted I/O
  UPSTREAM: binder: Add binder_proc logging to binderfs
  UPSTREAM: binder: Make transaction_log available in binderfs
  UPSTREAM: binder: Add stats, state and transactions files
  UPSTREAM: binder: add a mount option to show global stats
  UPSTREAM: binder: Validate the default binderfs device names.
  UPSTREAM: binder: Add default binder devices through binderfs when configured
  UPSTREAM: binder: fix CONFIG_ANDROID_BINDER_DEVICES
  UPSTREAM: android: binder: use kstrdup instead of open-coding it
  UPSTREAM: binderfs: remove separate device_initcall()
  UPSTREAM: binderfs: respect limit on binder control creation
  UPSTREAM: binderfs: switch from d_add() to d_instantiate()
  UPSTREAM: binderfs: drop lock in binderfs_binder_ctl_create
  UPSTREAM: binderfs: kill_litter_super() before cleanup
  UPSTREAM: binderfs: rework binderfs_binder_device_create()
  UPSTREAM: binderfs: rework binderfs_fill_super()
  UPSTREAM: binderfs: prevent renaming the control dentry
  UPSTREAM: binderfs: remove outdated comment
  UPSTREAM: binderfs: fix error return code in binderfs_fill_super()
  UPSTREAM: binderfs: handle !CONFIG_IPC_NS builds
  UPSTREAM: binderfs: reserve devices for initial mount
  UPSTREAM: binderfs: rename header to binderfs.h
  UPSTREAM: binderfs: implement "max" mount option
  UPSTREAM: binderfs: make each binderfs mount a new instance
  UPSTREAM: binderfs: remove wrong kern_mount() call
  UPSTREAM: binder: implement binderfs
  UPSTREAM: binder: remove BINDER_DEBUG_ENTRY()
  ANDROID: Don't base allmodconfig on gki_defconfig
  ANDROID: Disable UNWINDER_ORC for allmodconfig
  ANDROID: update abi_gki_aarch64.xml for 4.19.87
  BACKPORT: ARM: 8905/1: Emit __gnu_mcount_nc when using Clang 10.0.0 or newer
  ANDROID: update abi_gki_aarch64.xml
  ANDROID: gki_defconfig: =m's applied for virtio configs in arm64
  UPSTREAM: of: property: Add device link support for interrupt-parent, dmas and -gpio(s)
  UPSTREAM: of: property: Add device link support for "iommu-map"
  UPSTREAM: of: property: Fix the semantics of of_is_ancestor_of()
  UPSTREAM: i2c: of: Populate fwnode in of_i2c_get_board_info()
  UPSTREAM: driver core: Clarify documentation for fwnode_operations.add_links()
  UPSTREAM: dt-bindings: arm: coresight: Add support for coresight-loses-context-with-cpu
  BACKPORT: coresight: etm4x: Save/restore state across CPU low power states
  ANDROID: Update ABI representation
  ANDROID: gki_defconfig: IIO=y
  f2fs: stop GC when the victim becomes fully valid
  f2fs: expose main_blkaddr in sysfs
  f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project()
  f2fs: Fix deadlock in f2fs_gc() context during atomic files handling
  f2fs: show f2fs instance in printk_ratelimited
  f2fs: fix potential overflow
  f2fs: fix to update dir's i_pino during cross_rename
  f2fs: support aligned pinned file
  f2fs: avoid kernel panic on corruption test
  f2fs: fix wrong description in document
  f2fs: cache global IPU bio
  f2fs: fix to avoid memory leakage in f2fs_listxattr
  f2fs: check total_segments from devices in raw_super
  f2fs: update multi-dev metadata in resize_fs
  f2fs: mark recovery flag correctly in read_raw_super_block()
  f2fs: fix to update time in lazytime mode
  vfs: don't allow writes to swap files
  mm: set S_SWAPFILE on blockdev swap devices
  BACKPORT: ARM: 8900/1: UNWINDER_FRAME_POINTER implementation for Clang
  ANDROID: update abi_gki_aarch64.xml for 4.19.87
  ANDROID: gki_defconfig: FW_CACHE to no
  FROMGIT: firmware_class: make firmware caching configurable
  FROMLIST: arm64: implement Shadow Call Stack
  FROMLIST: arm64: disable SCS for hypervisor code
  BACKPORT: FROMLIST: arm64: vdso: disable Shadow Call Stack
  FROMLIST: arm64: efi: restore x18 if it was corrupted
  FROMLIST: arm64: preserve x18 when CPU is suspended
  FROMLIST: arm64: reserve x18 from general allocation with SCS
  FROMLIST: arm64: disable function graph tracing with SCS
  FROMLIST: scs: add support for stack usage debugging
  FROMLIST: scs: add accounting
  FROMLIST: add support for Clang's Shadow Call Stack (SCS)
  FROMLIST: arm64: kernel: avoid x18 in __cpu_soft_restart
  FROMLIST: arm64: kvm: stop treating register x18 as caller save
  FROMLIST: arm64/lib: copy_page: avoid x18 register in assembler code
  FROMLIST: arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings
  ANDROID: use non-canonical CFI jump tables
  ANDROID: arm64: add __nocfi to __apply_alternatives
  ANDROID: arm64: add __pa_function
  ANDROID: arm64: allow ThinLTO to be selected
  ANDROID: soc/tegra: disable ARCH_TEGRA_210_SOC with LTO
  FROMLIST: arm64: fix alternatives with LLVM's integrated assembler
  ANDROID: irqchip/gic-v3: rename gic_of_init to work around a ThinLTO+CFI bug
  ANDROID: init: ensure initcall ordering with LTO
  Revert "ANDROID: init: ensure initcall ordering with LTO"
  ANDROID: add support for ThinLTO
  ANDROID: clang: update to 10.0.1
  ANDROID: gki_defconfig: enable CONFIG_REGULATOR_FIXED_VOLTAGE
  ANDROID: gki_defconfig: removed CONFIG_PM_WAKELOCKS
  ANDROID: gki_defconfig: enable CONFIG_IKHEADERS as m
  FROMGIT: pinctrl: devicetree: Avoid taking direct reference to device name string
  ANDROID: update abi_gki_aarch64.xml for 4.19.86 update
  ANDROID: Update ABI representation
  ANDROID: gki_defconfig: disable FUNCTION_TRACER
  ANDROID: Update the ABI representation
  ANDROID: update ABI representation
  ANDROID: add unstripped modules to the distribution
  FROMLIST: vsprintf: Inline call to ptr_to_hashval
  UPSTREAM: rss_stat: Add support to detect RSS updates of external mm
  UPSTREAM: mm: emit tracepoint when RSS changes
  FROMGIT: driver core: Allow device link operations inside sync_state()
  ANDROID: uid_sys_stats: avoid double accounting of dying threads
  ANDROID: scsi: ufs-qcom: Enable BROKEN_CRYPTO quirk flag
  ANDROID: scsi: ufs-hisi: Enable BROKEN_CRYPTO quirk flag
  ANDROID: scsi: ufs: Add quirk bit for controllers that don't play well with inline crypto
  ANDROID: scsi: ufs: UFS init should not require inline crypto
  ANDROID: scsi: ufs: UFS crypto variant operations API
  ANDROID: gki_defconfig: enable inline encryption
  BACKPORT: FROMLIST: ext4: add inline encryption support
  BACKPORT: FROMLIST: f2fs: add inline encryption support
  BACKPORT: FROMLIST: fscrypt: add inline encryption support
  BACKPORT: FROMLIST: scsi: ufs: Add inline encryption support to UFS
  BACKPORT: FROMLIST: scsi: ufs: UFS crypto API
  BACKPORT: FROMLIST: scsi: ufs: UFS driver v2.1 spec crypto additions
  BACKPORT: FROMLIST: block: blk-crypto for Inline Encryption
  ANDROID: block: Fix bio_crypt_should_process WARN_ON
  BACKPORT: FROMLIST: block: Add encryption context to struct bio
  BACKPORT: FROMLIST: block: Keyslot Manager for Inline Encryption
  FROMLIST: f2fs: add support for IV_INO_LBLK_64 encryption policies
  FROMLIST: ext4: add support for IV_INO_LBLK_64 encryption policies
  BACKPORT: FROMLIST: fscrypt: add support for IV_INO_LBLK_64 policies
  FROMLIST: fscrypt: zeroize fscrypt_info before freeing
  FROMLIST: fscrypt: remove struct fscrypt_ctx
  BACKPORT: FROMLIST: fscrypt: invoke crypto API for ESSIV handling
  ANDROID: build kernels with llvm-nm and llvm-objcopy
  ANDROID: Fix allmodconfig build with CC=clang
  UPSTREAM: mm/page_poison: expose page_poisoning_enabled to kernel modules
  FROMGIT: of: property: Add device link support for iommus, mboxes and io-channels
  FROMGIT: of: property: Make it easy to add device links from DT properties
  FROMGIT: of: property: Minor style clean up of of_link_to_phandle()
  Revert "ANDROID: of/property: Add device link support for iommus"
  ANDROID: Add allmodconfig build.configs for x86_64 and aarch64
  ANDROID: fix allmodconfig build
  ANDROID: nf: IDLETIMER: Fix possible use before initialization in idletimer_resume
  BACKPORT: coresight: funnel: Support static funnel
  BACKPORT:FROMGIT: coresight: replicator: Fix missing spin_lock_init()
  BACKPORT:FROMGIT: coresight: funnel: Fix missing spin_lock_init()
  BACKPORT:FROMGIT: coresight: Serialize enabling/disabling a link device.
  UPSTREAM: coresight: tmc-etr: Add barrier packets when moving offset forward
  UPSTREAM: coresight: tmc-etr: Decouple buffer sync and barrier packet insertion
  UPSTREAM: coresight: tmc: Make memory width mask computation into a function
  UPSTREAM: coresight: tmc-etr: Fix perf_data check
  UPSTREAM: coresight: tmc-etr: Fix updating buffer in not-snapshot mode.
  UPSTREAM: coresight: tmc-etr: Check if non-secure access is enabled
  UPSTREAM: coresight: tmc-etr: Handle memory errors
  BACKPORT: coresight: etr_buf: Consolidate refcount initialization
  UPSTREAM: coresight: Fix DEBUG_LOCKS_WARN_ON for uninitialized attribute
  UPSTREAM: coresight: Use coresight device names for sinks in PMU attribute
  UPSTREAM: coresight: tmc-etr: alloc_perf_buf: Do not call smp_processor_id from preemptible
  UPSTREAM: coresight: tmc-etr: Do not call smp_processor_id() from preemptible
  UPSTREAM: coresight: perf: Don't set the truncated flag in snapshot mode
  UPSTREAM: coresight: tmc-etf: Fix snapshot mode update function
  UPSTREAM: coresight: tmc-etr: Properly set AUX buffer head in snapshot mode
  UPSTREAM: coresight: tmc-etr: Add support for CPU-wide trace scenarios
  UPSTREAM: coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios
  UPSTREAM: coresight: tmc-etr: Introduce the notion of IDR to ETR devices
  UPSTREAM: coresight: tmc-etr: Introduce the notion of reference counting to ETR devices
  UPSTREAM: coresight: tmc-etr: Introduce the notion of process ID to ETR devices
  UPSTREAM: coresight: tmc-etr: Create per-thread buffer allocation function
  UPSTREAM: coresight: tmc-etr: Refactor function tmc_etr_setup_perf_buf()
  UPSTREAM: coresight: Communicate perf event to sink buffer allocation functions
  UPSTREAM: coresight: perf: Refactor function free_event_data()
  UPSTREAM: coresight: perf: Clean up function etm_setup_aux()
  UPSTREAM: coresight: Properly address concurrency in sink::update() functions
  UPSTREAM: coresight: Properly address errors in sink::disable() functions
  UPSTREAM: coresight: Move reference counting inside sink drivers
  UPSTREAM: coresight: Adding return code to sink::disable() operation
  UPSTREAM: coresight: etm4x: Configure tracers to emit timestamps
  UPSTREAM: coresight: etm4x: Skip selector pair 0
  UPSTREAM: coresight: etm4x: Add kernel configuration for CONTEXTID
  UPSTREAM: coresight: pmu: Adding ITRACE property to cs_etm PMU
  UPSTREAM: coresight: tmc: Cleanup power management
  UPSTREAM: coresight: Fix freeing up the coresight connections
  UPSTREAM: coresight: tmc: Report DMA setup failures
  UPSTREAM: coresight: catu: fix clang build warning
  UPSTREAM: perf/core: Fix the address filtering fix
  UPSTREAM: perf, pt, coresight: Fix address filters for vmas with non-zero offset
  UPSTREAM: perf: Copy parent's address filter offsets on clone
  UPSTREAM: coresight: Use event attributes for sink selection
  UPSTREAM: coresight: perf: Add "sinks" group to PMU directory
  UPSTREAM: coresight: etb10: Add support for CLAIM tag
  UPSTREAM: coreisght: tmc: Claim device before use
  UPSTREAM: coresight: dynamic-replicator: Claim device for use
  UPSTREAM: coresight: funnel: Claim devices before use
  UPSTREAM: coresight: etmx: Claim devices before use
  UPSTREAM: coresight: Add support for CLAIM tag protocol
  UPSTREAM: coresight: dynamic-replicator: Handle multiple connections
  UPSTREAM: coresight: etb10: Handle errors enabling the device
  UPSTREAM: coresight: etm3: Add support for handling errors
  UPSTREAM: coresight: etm4x: Add support for handling errors
  UPSTREAM: coresight: tmc-etb/etf: Prepare to handle errors enabling
  UPSTREAM: coresight: tmc-etr: Handle errors enabling CATU
  UPSTREAM: coresight: tmc-etr: Refactor for handling errors
  UPSTREAM: coresight: Handle failures in enabling a trace path
  UPSTREAM: coresight: tmc: Fix byte-address alignment for RRP
  UPSTREAM: coresight: etm4x: Configure EL2 exception level when kernel is running in HYP
  UPSTREAM: coresight: etb10: Splitting function etb_enable()
  UPSTREAM: coresight: etb10: Refactor etb_drvdata::mode handling
  UPSTREAM: coresight: etm-perf: Add support for ETR backend
  UPSTREAM: coresight: perf: Remove set_buffer call back
  UPSTREAM: coresight: perf: Add helper to retrieve sink configuration
  UPSTREAM: coresight: perf: Remove reset_buffer call back for sinks
  UPSTREAM: coresight: Convert driver messages to dev_dbg
  UPSTREAM: coresight: tmc-etr: Relax collection of trace from sysfs mode
  UPSTREAM: coresight: tmc-etr: Handle driver mode specific ETR buffers
  UPSTREAM: coresight: perf: Disable trace path upon source error
  UPSTREAM: coresight: perf: Allow tracing on hotplugged CPUs
  UPSTREAM: coresight: perf: Avoid unncessary CPU hotplug read lock
  UPSTREAM: coresight: perf: Fix per cpu path management
  UPSTREAM: coresight: Fix handling of sinks
  UPSTREAM: coresight: Use ERR_CAST instead of ERR_PTR
  UPSTREAM: coresight: Fix remote endpoint parsing
  UPSTREAM: coresight: platform: Fix leaking device reference
  UPSTREAM: coresight: platform: Fix refcounting for graph nodes
  UPSTREAM: coresight: platform: Refactor graph endpoint parsing
  UPSTREAM: coresight: Document error handling in coresight_register
  ANDROID: regression introduced override_creds=off
  ANDROID: overlayfs: internal getxattr operations without sepolicy checking
  ANDROID: overlayfs: add __get xattr method
  ANDROID: Add optional __get xattr method paired to __vfs_getxattr
  UPSTREAM: scsi: ufs: override auto suspend tunables for ufs
  UPSTREAM: scsi: core: allow auto suspend override by low-level driver
  FROMGIT: of: property: Skip adding device links to suppliers that aren't devices
  ANDROID: gki_defconfig: enable CONFIG_KEYBOARD_GPIO
  UPSTREAM: dm bufio: introduce a global cache replacement
  UPSTREAM: dm bufio: remove old-style buffer cleanup
  UPSTREAM: dm bufio: introduce a global queue
  UPSTREAM: dm bufio: refactor adjust_total_allocated
  UPSTREAM: dm bufio: call adjust_total_allocated from __link_buffer and __unlink_buffer
  ANDROID: dummy_cpufreq: Implement get()
  ANDROID: gki_defconfig: enable CONFIG_CPUSETS
  ANDROID: virtio: virtio_input: Set the amount of multitouch slots in virtio input
  rtlwifi: Fix potential overflow on P2P code
  ANDROID: cpufreq: create dummy cpufreq driver
  ANDROID: Allow DRM_IOCTL_MODE_*_DUMB for render clients.
  Cuttlefish Wifi: Add data ops in virt_wifi driver for scan data simulation
  ANDROID: of: property: Enable of_devlink by default
  ANDROID: of: property: Make sure child dependencies don't block probing of parent
  ANDROID: driver core: Allow fwnode_operations.add_links to differentiate errors
  ANDROID: driver core: Allow a device to wait on optional suppliers
  ANDROID: driver core: Add device link support for SYNC_STATE_ONLY flag
  FROMGIT: docs: driver-model: Add documentation for sync_state
  FROMGIT: driver: core: Improve documentation for fwnode_operations.add_links()
  FROMGIT: of: property: Minor code formatting/style clean ups
  ANDROID: of/property: Add device link support for iommus
  ANDROID: move up spin_unlock_bh() ahead of remove_proc_entry()
  BACKPORT: arm64: tags: Preserve tags for addresses translated via TTBR1
  UPSTREAM: arm64: memory: Implement __tag_set() as common function
  UPSTREAM: arm64/mm: fix variable 'tag' set but not used
  UPSTREAM: arm64: avoid clang warning about self-assignment
  ANDROID: sdcardfs: evict dentries on fscrypt key removal
  ANDROID: fscrypt: add key removal notifier chain
  ANDROID: refactor build.config files to remove duplication
  ANDROID: Move from clang r353983c to r365631c
  ANDROID: gki_defconfig: remove PWRSEQ_EMMC and PWRSEQ_SIMPLE
  ANDROID: unconditionally compile sig_ok in struct module
  ANDROID: gki_defconfig: enable fs-verity
  UPSTREAM: mm: vmalloc: show number of vmalloc pages in /proc/meminfo
  BACKPORT: PM/sleep: Expose suspend stats in sysfs
  UPSTREAM: power: supply: Init device wakeup after device_add()
  UPSTREAM: PM / wakeup: Unexport wakeup_source_sysfs_{add,remove}()
  UPSTREAM: PM / wakeup: Register wakeup class kobj after device is added
  UPSTREAM: PM / wakeup: Fix sysfs registration error path
  UPSTREAM: PM / wakeup: Show wakeup sources stats in sysfs
  UPSTREAM: PM / wakeup: Use wakeup_source_register() in wakelock.c
  UPSTREAM: PM / wakeup: Drop wakeup_source_init(), wakeup_source_prepare()
  UPSTREAM: PM / wakeup: Drop wakeup_source_drop()
  UPSTREAM: PM / core: Add support to skip power management in device/driver model
  gki_defconfig: Enable CONFIG_DM_SNAPSHOT
  ANDROID: gki_defconfig: enable accelerated AES and SHA-256
  ANDROID: fix overflow in /proc/uid_cputime/remove_uid_range
  ANDROID: kasan: fix has_attribute check on older GCC versions
  ANDROID: gki_defconfig: enable CONFIG_PARAVIRT and CONFIG_HYPERVISOR_GUEST
  ANDROID: gki_defconfig: enable CONFIG_NLS_*
  ANDROID: gki_defconfig: Enable BPF_JIT and BPF_JIT_ALWAYS_ON
  FROMGIT: of: property: Create device links for all child-supplier depencencies
  FROMGIT: of/platform: Pause/resume sync state during init and of_platform_populate()
  BACKPORT: FROMGIT: driver core: Add sync_state driver/bus callback
  BACKPORT: FROMGIT: of: property: Add functional dependency link from DT bindings
  FROMGIT: driver core: Add support for linking devices during device addition
  FROMGIT: driver core: Add fwnode_to_dev() to look up device from fwnode
  UPSTREAM: mm: untag user pointers in mmap/munmap/mremap/brk
  UPSTREAM: vfio/type1: untag user pointers in vaddr_get_pfn
  UPSTREAM: tee/shm: untag user pointers in tee_shm_register
  UPSTREAM: media/v4l2-core: untag user pointers in videobuf_dma_contig_user_get
  UPSTREAM: drm/radeon: untag user pointers in radeon_gem_userptr_ioctl
  BACKPORT: drm/amdgpu: untag user pointers
  UPSTREAM: userfaultfd: untag user pointers
  UPSTREAM: fs/namespace: untag user pointers in copy_mount_options
  UPSTREAM: mm: untag user pointers in get_vaddr_frames
  UPSTREAM: mm: untag user pointers in mm/gup.c
  UPSTREAM: mm: untag user pointers passed to memory syscalls
  BACKPORT: lib: untag user pointers in strn*_user
  UPSTREAM: arm64: Fix reference to docs for ARM64_TAGGED_ADDR_ABI
  UPSTREAM: selftests, arm64: add kernel headers path for tags_test
  BACKPORT: arm64: Relax Documentation/arm64/tagged-pointers.rst
  UPSTREAM: arm64: Define Documentation/arm64/tagged-address-abi.rst
  UPSTREAM: arm64: Change the tagged_addr sysctl control semantics to only prevent the opt-in
  UPSTREAM: arm64: Tighten the PR_{SET, GET}_TAGGED_ADDR_CTRL prctl() unused arguments
  UPSTREAM: selftests, arm64: fix uninitialized symbol in tags_test.c
  UPSTREAM: arm64: mm: Really fix sparse warning in untagged_addr()
  UPSTREAM: selftests, arm64: add a selftest for passing tagged pointers to kernel
  BACKPORT: arm64: Introduce prctl() options to control the tagged user addresses ABI
  UPSTREAM: arm64: untag user pointers in access_ok and __uaccess_mask_ptr
  UPSTREAM: uaccess: add noop untagged_addr definition
  BACKPORT: block: annotate refault stalls from IO submission
  f2fs: add a condition to detect overflow in f2fs_ioc_gc_range()
  f2fs: fix to add missing F2FS_IO_ALIGNED() condition
  f2fs: fix to fallback to buffered IO in IO aligned mode
  f2fs: fix to handle error path correctly in f2fs_map_blocks
  f2fs: fix extent corrupotion during directIO in LFS mode
  f2fs: check all the data segments against all node ones
  f2fs: Add a small clarification to CONFIG_FS_F2FS_FS_SECURITY
  f2fs: fix inode rwsem regression
  f2fs: fix to avoid accessing uninitialized field of inode page in is_alive()
  f2fs: avoid infinite GC loop due to stale atomic files
  f2fs: Fix indefinite loop in f2fs_gc()
  f2fs: convert inline_data in prior to i_size_write
  f2fs: fix error path of f2fs_convert_inline_page()
  f2fs: add missing documents of reserve_root/resuid/resgid
  f2fs: fix flushing node pages when checkpoint is disabled
  f2fs: enhance f2fs_is_checkpoint_ready()'s readability
  f2fs: clean up __bio_alloc()'s parameter
  f2fs: fix wrong error injection path in inc_valid_block_count()
  f2fs: fix to writeout dirty inode during node flush
  f2fs: optimize case-insensitive lookups
  f2fs: introduce f2fs_match_name() for cleanup
  f2fs: Fix indefinite loop in f2fs_gc()
  f2fs: allocate memory in batch in build_sit_info()
  f2fs: support FS_IOC_{GET,SET}FSLABEL
  f2fs: fix to avoid data corruption by forbidding SSR overwrite
  f2fs: Fix build error while CONFIG_NLS=m
  Revert "f2fs: avoid out-of-range memory access"
  f2fs: cleanup the code in build_sit_entries.
  f2fs: fix wrong available node count calculation
  f2fs: remove duplicate code in f2fs_file_write_iter
  f2fs: fix to migrate blocks correctly during defragment
  f2fs: use wrapped f2fs_cp_error()
  f2fs: fix to use more generic EOPNOTSUPP
  f2fs: use wrapped IS_SWAPFILE()
  f2fs: Support case-insensitive file name lookups
  f2fs: include charset encoding information in the superblock
  fs: Reserve flag for casefolding
  f2fs: fix to avoid call kvfree under spinlock
  fs: f2fs: Remove unnecessary checks of SM_I(sbi) in update_general_status()
  f2fs: disallow direct IO in atomic write
  f2fs: fix to handle quota_{on,off} correctly
  f2fs: fix to detect cp error in f2fs_setxattr()
  f2fs: fix to spread f2fs_is_checkpoint_ready()
  f2fs: support fiemap() for directory inode
  f2fs: fix to avoid discard command leak
  f2fs: fix to avoid tagging SBI_QUOTA_NEED_REPAIR incorrectly
  f2fs: fix to drop meta/node pages during umount
  f2fs: disallow switching io_bits option during remount
  f2fs: fix panic of IO alignment feature
  f2fs: introduce {page,io}_is_mergeable() for readability
  f2fs: fix livelock in swapfile writes
  f2fs: add fs-verity support
  ext4: update on-disk format documentation for fs-verity
  ext4: add fs-verity read support
  ext4: add basic fs-verity support
  fs-verity: support builtin file signatures
  fs-verity: add SHA-512 support
  fs-verity: implement FS_IOC_MEASURE_VERITY ioctl
  fs-verity: implement FS_IOC_ENABLE_VERITY ioctl
  fs-verity: add data verification hooks for ->readpages()
  fs-verity: add the hook for file ->setattr()
  fs-verity: add the hook for file ->open()
  fs-verity: add inode and superblock fields
  fs-verity: add Kconfig and the helper functions for hashing
  fs: uapi: define verity bit for FS_IOC_GETFLAGS
  fs-verity: add UAPI header
  fs-verity: add MAINTAINERS file entry
  fs-verity: add a documentation file
  ext4: fix kernel oops caused by spurious casefold flag
  ext4: fix coverity warning on error path of filename setup
  ext4: optimize case-insensitive lookups
  ext4: fix dcache lookup of !casefolded directories
  unicode: update to Unicode 12.1.0 final
  unicode: add missing check for an error return from utf8lookup()
  ext4: export /sys/fs/ext4/feature/casefold if Unicode support is present
  unicode: refactor the rule for regenerating utf8data.h
  ext4: Support case-insensitive file name lookups
  ext4: include charset encoding information in the superblock
  unicode: update unicode database unicode version 12.1.0
  unicode: introduce test module for normalized utf8 implementation
  unicode: implement higher level API for string handling
  unicode: reduce the size of utf8data[]
  unicode: introduce code for UTF-8 normalization
  unicode: introduce UTF-8 character database
  ext4 crypto: fix to check feature status before get policy
  fscrypt: document the new ioctls and policy version
  ubifs: wire up new fscrypt ioctls
  f2fs: wire up new fscrypt ioctls
  ext4: wire up new fscrypt ioctls
  fscrypt: require that key be added when setting a v2 encryption policy
  fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl
  fscrypt: allow unprivileged users to add/remove keys for v2 policies
  fscrypt: v2 encryption policy support
  fscrypt: add an HKDF-SHA512 implementation
  fscrypt: add FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl
  fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl
  fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
  fscrypt: rename keyinfo.c to keysetup.c
  fscrypt: move v1 policy key setup to keysetup_v1.c
  fscrypt: refactor key setup code in preparation for v2 policies
  fscrypt: rename fscrypt_master_key to fscrypt_direct_key
  fscrypt: add ->ci_inode to fscrypt_info
  fscrypt: use FSCRYPT_* definitions, not FS_*
  fscrypt: use FSCRYPT_ prefix for uapi constants
  fs, fscrypt: move uapi definitions to new header <linux/fscrypt.h>
  fscrypt: use ENOPKG when crypto API support missing
  fscrypt: improve warnings for missing crypto API support
  fscrypt: improve warning messages for unsupported encryption contexts
  fscrypt: make fscrypt_msg() take inode instead of super_block
  fscrypt: clean up base64 encoding/decoding
  fscrypt: remove loadable module related code

Updated following files to fix build errors:
	drivers/gpu/msm/kgsl_pool.c
	drivers/hwtracing/coresight/coresight-dummy.c
	drivers/iommu/dma-mapping-fast.c
	drivers/iommu/io-pgtable-fast.c
	drivers/iommu/io-pgtable-msm-secure.c
	kernel/taskstats.c
	mm/vmalloc.c
	security/selinux/ss/sidtab.h

Conflicts:
	arch/arm/Makefile
	arch/arm64/Kconfig
	arch/x86/include/asm/syscall_wrapper.h
	build.config.common
	drivers/clk/clk.c
	drivers/hwtracing/coresight/coresight-etm-perf.c
	drivers/hwtracing/coresight/coresight-funnel.c
	drivers/hwtracing/coresight/coresight-tmc-etf.c
	drivers/hwtracing/coresight/coresight-tmc-etr.c
	drivers/hwtracing/coresight/coresight-tmc.c
	drivers/hwtracing/coresight/coresight-tmc.h
	drivers/hwtracing/coresight/coresight.c
	drivers/hwtracing/coresight/of_coresight.c
	drivers/iommu/arm-smmu.c
	drivers/iommu/io-pgtable-arm.c
	drivers/iommu/io-pgtable.c
	drivers/scsi/scsi_sysfs.c
	drivers/scsi/sd.c
	drivers/scsi/ufs/ufshcd.c
	drivers/scsi/ufs/ufshcd.h
	drivers/staging/android/ion/ion.c
	drivers/staging/android/ion/ion.h
	drivers/staging/android/ion/ion_page_pool.c
	fs/ext4/readpage.c
	fs/f2fs/data.c
	fs/f2fs/f2fs.h
	fs/f2fs/file.c
	fs/f2fs/segment.c
	fs/f2fs/super.c
	include/linux/clk-provider.h
	include/linux/compiler_types.h
	include/linux/coresight.h
	include/linux/mmzone.h
	include/scsi/scsi_device.h
	include/trace/events/kmem.h
	kernel/events/core.c
	kernel/sched/core.c
	mm/vmstat.c

Change-Id: I2eca52b08b484f2b5c30437671cab8cb0195b8d6
Signed-off-by: Ivaylo Georgiev <irgeorgiev@codeaurora.org>
This commit is contained in:
Ivaylo Georgiev 2020-03-25 23:32:38 -07:00
commit 4c30d46517
314 changed files with 73850 additions and 169578 deletions

View file

@ -31,6 +31,12 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of segment discard commands.
What: /sys/fs/f2fs/<disk>/max_blkaddr
Date: November 2019
Contact: "Ramon Pantin" <pantin@google.com>
Description:
Shows first block address of MAIN area.
What: /sys/fs/f2fs/<disk>/ipu_policy
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@ -251,3 +257,10 @@ Description:
If checkpoint=disable, it displays the number of blocks that are unusable.
If checkpoint=enable it displays the enumber of blocks that would be unusable
if checkpoint=disable were to be set.
What: /sys/fs/f2fs/<disk>/encoding
Date July 2019
Contact: "Daniel Rosenberg" <drosen@google.com>
Description:
Displays name and version of the encoding set for the filesystem.
If no encoding is set, displays (none)

View file

@ -0,0 +1,27 @@
What: /sys/kernel/ion
Date: Dec 2019
KernelVersion: 4.14.158
Contact: Suren Baghdasaryan <surenb@google.com>,
Sandeep Patil <sspatil@google.com>
Description:
The /sys/kernel/ion directory contains a snapshot of the
internal state of ION memory heaps and pools.
Users: kernel memory tuning tools
What: /sys/kernel/ion/total_heaps_kb
Date: Dec 2019
KernelVersion: 4.14.158
Contact: Suren Baghdasaryan <surenb@google.com>,
Sandeep Patil <sspatil@google.com>
Description:
The total_heaps_kb file is read-only and specifies how much
memory in Kb is allocated to ION heaps.
What: /sys/kernel/ion/total_pools_kb
Date: Dec 2019
KernelVersion: 4.14.158
Contact: Suren Baghdasaryan <surenb@google.com>,
Sandeep Patil <sspatil@google.com>
Description:
The total_pools_kb file is read-only and specifies how much
memory in Kb is allocated to ION pools.

View file

@ -300,4 +300,110 @@ Description:
attempt.
Using this sysfs file will override any values that were
set using the kernel command line for disk offset.
set using the kernel command line for disk offset.
What: /sys/power/suspend_stats
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats directory contains suspend related
statistics.
What: /sys/power/suspend_stats/success
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/success file contains the number
of times entering system sleep state succeeded.
What: /sys/power/suspend_stats/fail
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/fail file contains the number
of times entering system sleep state failed.
What: /sys/power/suspend_stats/failed_freeze
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_freeze file contains the
number of times freezing processes failed.
What: /sys/power/suspend_stats/failed_prepare
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_prepare file contains the
number of times preparing all non-sysdev devices for
a system PM transition failed.
What: /sys/power/suspend_stats/failed_resume
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_resume file contains the
number of times executing "resume" callbacks of
non-sysdev devices failed.
What: /sys/power/suspend_stats/failed_resume_early
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_resume_early file contains
the number of times executing "early resume" callbacks
of devices failed.
What: /sys/power/suspend_stats/failed_resume_noirq
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_resume_noirq file contains
the number of times executing "noirq resume" callbacks
of devices failed.
What: /sys/power/suspend_stats/failed_suspend
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_suspend file contains
the number of times executing "suspend" callbacks
of all non-sysdev devices failed.
What: /sys/power/suspend_stats/failed_suspend_late
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_suspend_late file contains
the number of times executing "late suspend" callbacks
of all devices failed.
What: /sys/power/suspend_stats/failed_suspend_noirq
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/failed_suspend_noirq file contains
the number of times executing "noirq suspend" callbacks
of all devices failed.
What: /sys/power/suspend_stats/last_failed_dev
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/last_failed_dev file contains
the last device for which a suspend/resume callback failed.
What: /sys/power/suspend_stats/last_failed_errno
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/last_failed_errno file contains
the errno of the last failed attempt at entering
system sleep state.
What: /sys/power/suspend_stats/last_failed_step
Date: July 2019
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
Description:
The /sys/power/suspend_stats/last_failed_step file contains
the last failed step in the suspend/resume path.

View file

@ -126,6 +126,7 @@ parameter is applicable::
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
NFS Appropriate NFS support is enabled.
OF Devicetree is enabled.
OSS OSS sound support is enabled.
PV_OPS A paravirtualized kernel is enabled.
PARIDE The ParIDE (parallel port IDE) subsystem is enabled.

View file

@ -3118,6 +3118,12 @@
This can be set from sysctl after boot.
See Documentation/sysctl/vm.txt for details.
of_devlink [OF, KNL] Create device links between consumer and
supplier devices by scanning the devictree to infer the
consumer/supplier relationships. A consumer device
will not be probed until all the supplier devices have
probed successfully.
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
See Documentation/debugging-via-ohci1394.txt for more
info.

View file

@ -0,0 +1,156 @@
==========================
AArch64 TAGGED ADDRESS ABI
==========================
Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
Catalin Marinas <catalin.marinas@arm.com>
Date: 21 August 2019
This document describes the usage and semantics of the Tagged Address
ABI on AArch64 Linux.
1. Introduction
---------------
On AArch64 the ``TCR_EL1.TBI0`` bit is set by default, allowing
userspace (EL0) to perform memory accesses through 64-bit pointers with
a non-zero top byte. This document describes the relaxation of the
syscall ABI that allows userspace to pass certain tagged pointers to
kernel syscalls.
2. AArch64 Tagged Address ABI
-----------------------------
From the kernel syscall interface perspective and for the purposes of
this document, a "valid tagged pointer" is a pointer with a potentially
non-zero top-byte that references an address in the user process address
space obtained in one of the following ways:
- ``mmap()`` syscall where either:
- flags have the ``MAP_ANONYMOUS`` bit set or
- the file descriptor refers to a regular file (including those
returned by ``memfd_create()``) or ``/dev/zero``
- ``brk()`` syscall (i.e. the heap area between the initial location of
the program break at process creation and its current location).
- any memory mapped by the kernel in the address space of the process
during creation and with the same restrictions as for ``mmap()`` above
(e.g. data, bss, stack).
The AArch64 Tagged Address ABI has two stages of relaxation depending
how the user addresses are used by the kernel:
1. User addresses not accessed by the kernel but used for address space
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
of valid tagged pointers in this context is always allowed.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to
explicitly enable it via ``prctl()`` as follows:
- ``PR_SET_TAGGED_ADDR_CTRL``: enable or disable the AArch64 Tagged
Address ABI for the calling thread.
The ``(unsigned int) arg2`` argument is a bit mask describing the
control mode used:
- ``PR_TAGGED_ADDR_ENABLE``: enable AArch64 Tagged Address ABI.
Default status is disabled.
Arguments ``arg3``, ``arg4``, and ``arg5`` must be 0.
- ``PR_GET_TAGGED_ADDR_CTRL``: get the status of the AArch64 Tagged
Address ABI for the calling thread.
Arguments ``arg2``, ``arg3``, ``arg4``, and ``arg5`` must be 0.
The ABI properties described above are thread-scoped, inherited on
clone() and fork() and cleared on exec().
Calling ``prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)``
returns ``-EINVAL`` if the AArch64 Tagged Address ABI is globally
disabled by ``sysctl abi.tagged_addr_disabled=1``. The default
``sysctl abi.tagged_addr_disabled`` configuration is 0.
When the AArch64 Tagged Address ABI is enabled for a thread, the
following behaviours are guaranteed:
- All syscalls except the cases mentioned in section 3 can accept any
valid tagged pointer.
- The syscall behaviour is undefined for invalid tagged pointers: it may
result in an error code being returned, a (fatal) signal being raised,
or other modes of failure.
- The syscall behaviour for a valid tagged pointer is the same as for
the corresponding untagged pointer.
A definition of the meaning of tagged pointers on AArch64 can be found
in Documentation/arm64/tagged-pointers.rst.
3. AArch64 Tagged Address ABI Exceptions
-----------------------------------------
The following system call parameters must be untagged regardless of the
ABI relaxation:
- ``prctl()`` other than pointers to user data either passed directly or
indirectly as arguments to be accessed by the kernel.
- ``ioctl()`` other than pointers to user data either passed directly or
indirectly as arguments to be accessed by the kernel.
- ``shmat()`` and ``shmdt()``.
Any attempt to use non-zero tagged pointers may result in an error code
being returned, a (fatal) signal being raised, or other modes of
failure.
4. Example of correct usage
---------------------------
.. code-block:: c
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
#define TAG_SHIFT 56
int main(void)
{
int tbi_enabled = 0;
unsigned long tag = 0;
char *ptr;
/* check/enable the tagged address ABI */
if (!prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0))
tbi_enabled = 1;
/* memory allocation */
ptr = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
return 1;
/* set a non-zero tag if the ABI is available */
if (tbi_enabled)
tag = rand() & 0xff;
ptr = (char *)((unsigned long)ptr | (tag << TAG_SHIFT));
/* memory access to a tagged address */
strcpy(ptr, "tagged pointer\n");
/* syscall with a tagged pointer */
write(1, ptr, strlen(ptr));
return 0;
}

View file

@ -18,7 +18,9 @@ Passing tagged addresses to the kernel
--------------------------------------
All interpretation of userspace memory addresses by the kernel assumes
an address tag of 0x00.
an address tag of 0x00, unless the application enables the AArch64
Tagged Address ABI explicitly
(Documentation/arm64/tagged-address-abi.rst).
This includes, but is not limited to, addresses found in:
@ -31,13 +33,15 @@ This includes, but is not limited to, addresses found in:
- the frame pointer (x29) and frame records, e.g. when interpreting
them to generate a backtrace or call graph.
Using non-zero address tags in any of these locations may result in an
error code being returned, a (fatal) signal being raised, or other modes
of failure.
Using non-zero address tags in any of these locations when the
userspace application did not enable the AArch64 Tagged Address ABI may
result in an error code being returned, a (fatal) signal being raised,
or other modes of failure.
For these reasons, passing non-zero address tags to the kernel via
system calls is forbidden, and using a non-zero address tag for sp is
strongly discouraged.
For these reasons, when the AArch64 Tagged Address ABI is disabled,
passing non-zero address tags to the kernel via system calls is
forbidden, and using a non-zero address tag for sp is strongly
discouraged.
Programs maintaining a frame pointer and frame records that use non-zero
address tags may suffer impaired or inaccurate debug and profiling
@ -57,6 +61,9 @@ be preserved.
The architecture prevents the use of a tagged PC, so the upper byte will
be set to a sign-extension of bit 55 on exception return.
This behaviour is maintained when the AArch64 Tagged Address ABI is
enabled.
Other considerations
--------------------

View file

@ -177,6 +177,7 @@ mkprep
mkregtable
mktables
mktree
mkutf8data
modpost
modules.builtin
modules.order
@ -255,6 +256,7 @@ vsyscall_32.lds
wanxlfw.inc
uImage
unifdef
utf8data.h
wakeup.bin
wakeup.elf
wakeup.lds

View file

@ -242,7 +242,8 @@ State machine
:c:func:`driver_bound()`.)
* Before a consumer device is probed, presence of supplier drivers is
verified by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
verified by checking the consumer device is not in the wait_for_suppliers
list and by checking that links to suppliers are in ``DL_STATE_AVAILABLE``
state. The state of the links is updated to ``DL_STATE_CONSUMER_PROBE``.
(Call to :c:func:`device_links_check_suppliers()` from
:c:func:`really_probe()`.)

View file

@ -164,6 +164,49 @@ A driver's probe() may return a negative errno value to indicate that
the driver did not bind to this device, in which case it should have
released all resources it allocated.
void (*sync_state)(struct device *dev);
sync_state is called only once for a device. It's called when all the consumer
devices of the device have successfully probed. The list of consumers of the
device is obtained by looking at the device links connecting that device to its
consumer devices.
The first attempt to call sync_state() is made during late_initcall_sync() to
give firmware and drivers time to link devices to each other. During the first
attempt at calling sync_state(), if all the consumers of the device at that
point in time have already probed successfully, sync_state() is called right
away. If there are no consumers of the device during the first attempt, that
too is considered as "all consumers of the device have probed" and sync_state()
is called right away.
If during the first attempt at calling sync_state() for a device, there are
still consumers that haven't probed successfully, the sync_state() call is
postponed and reattempted in the future only when one or more consumers of the
device probe successfully. If during the reattempt, the driver core finds that
there are one or more consumers of the device that haven't probed yet, then
sync_state() call is postponed again.
A typical use case for sync_state() is to have the kernel cleanly take over
management of devices from the bootloader. For example, if a device is left on
and at a particular hardware configuration by the bootloader, the device's
driver might need to keep the device in the boot configuration until all the
consumers of the device have probed. Once all the consumers of the device have
probed, the device's driver can synchronize the hardware state of the device to
match the aggregated software state requested by all the consumers. Hence the
name sync_state().
While obvious examples of resources that can benefit from sync_state() include
resources such as regulator, sync_state() can also be useful for complex
resources like IOMMUs. For example, IOMMUs with multiple consumers (devices
whose addresses are remapped by the IOMMU) might need to keep their mappings
fixed at (or additive to) the boot configuration until all its consumers have
probed.
While the typical use case for sync_state() is to have the kernel cleanly take
over management of devices from the bootloader, the usage of sync_state() is
not restricted to that. Use it whenever it makes sense to take an action after
all the consumers of a device have probed.
int (*remove) (struct device * dev);
remove is called to unbind a driver from a device. This may be

View file

@ -24,3 +24,4 @@ order.
.. include:: bigalloc.rst
.. include:: inlinedata.rst
.. include:: eainode.rst
.. include:: verity.rst

View file

@ -0,0 +1,41 @@
.. SPDX-License-Identifier: GPL-2.0
Verity files
------------
ext4 supports fs-verity, which is a filesystem feature that provides
Merkle tree based hashing for individual readonly files. Most of
fs-verity is common to all filesystems that support it; see
:ref:`Documentation/filesystems/fsverity.rst <fsverity>` for the
fs-verity documentation. However, the on-disk layout of the verity
metadata is filesystem-specific. On ext4, the verity metadata is
stored after the end of the file data itself, in the following format:
- Zero-padding to the next 65536-byte boundary. This padding need not
actually be allocated on-disk, i.e. it may be a hole.
- The Merkle tree, as documented in
:ref:`Documentation/filesystems/fsverity.rst
<fsverity_merkle_tree>`, with the tree levels stored in order from
root to leaf, and the tree blocks within each level stored in their
natural order.
- Zero-padding to the next filesystem block boundary.
- The verity descriptor, as documented in
:ref:`Documentation/filesystems/fsverity.rst <fsverity_descriptor>`,
with optionally appended signature blob.
- Zero-padding to the next offset that is 4 bytes before a filesystem
block boundary.
- The size of the verity descriptor in bytes, as a 4-byte little
endian integer.
Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e.
EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear.
They can have EXT4_ENCRYPT_FL set, in which case the verity metadata
is encrypted as well as the data itself.
Verity files cannot have blocks allocated past the end of the verity
metadata.

View file

@ -157,6 +157,11 @@ noinline_data Disable the inline data feature, inline data feature is
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
reserve_root=%d Support configuring reserved space which is used for
allocation from a privileged user with specified uid or
gid, unit: 4KB, the default limit is 0.2% of user blocks.
resuid=%d The user ID which may use the reserved blocks.
resgid=%d The group ID which may use the reserved blocks.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
fault_type=%d Support configuring fault injection type, should be
@ -292,6 +297,9 @@ Files in /sys/fs/f2fs/<devname>
reclaim the prefree segments to free segments.
By default, 5% over total # of segments.
main_blkaddr This value gives the first block address of
MAIN area in the partition.
max_small_discards This parameter controls the number of discard
commands that consist small blocks less than 2MB.
The candidates to be discarded are cached until
@ -341,7 +349,7 @@ Files in /sys/fs/f2fs/<devname>
ram_thresh This parameter controls the memory footprint used
by free nids and cached nat entries. By default,
10 is set, which indicates 10 MB / 1 GB RAM.
1 is set, which indicates 10 MB / 1 GB RAM.
ra_nid_pages When building free nids, F2FS reads NAT blocks
ahead for speed up. Default is 0.
@ -413,6 +421,9 @@ Files in /sys/fs/f2fs/<devname>
that would be unusable if checkpoint=disable were
to be set.
encoding This shows the encoding used for casefolding.
If casefolding is not enabled, returns (none)
================================================================================
USAGE
================================================================================

View file

@ -0,0 +1,726 @@
.. SPDX-License-Identifier: GPL-2.0
.. _fsverity:
=======================================================
fs-verity: read-only file-based authenticity protection
=======================================================
Introduction
============
fs-verity (``fs/verity/``) is a support layer that filesystems can
hook into to support transparent integrity and authenticity protection
of read-only files. Currently, it is supported by the ext4 and f2fs
filesystems. Like fscrypt, not too much filesystem-specific code is
needed to support fs-verity.
fs-verity is similar to `dm-verity
<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
but works on files rather than block devices. On regular files on
filesystems supporting fs-verity, userspace can execute an ioctl that
causes the filesystem to build a Merkle tree for the file and persist
it to a filesystem-specific location associated with the file.
After this, the file is made readonly, and all reads from the file are
automatically verified against the file's Merkle tree. Reads of any
corrupted data, including mmap reads, will fail.
Userspace can use another ioctl to retrieve the root hash (actually
the "file measurement", which is a hash that includes the root hash)
that fs-verity is enforcing for the file. This ioctl executes in
constant time, regardless of the file size.
fs-verity is essentially a way to hash a file in constant time,
subject to the caveat that reads which would violate the hash will
fail at runtime.
Use cases
=========
By itself, the base fs-verity feature only provides integrity
protection, i.e. detection of accidental (non-malicious) corruption.
However, because fs-verity makes retrieving the file hash extremely
efficient, it's primarily meant to be used as a tool to support
authentication (detection of malicious modifications) or auditing
(logging file hashes before use).
Trusted userspace code (e.g. operating system code running on a
read-only partition that is itself authenticated by dm-verity) can
authenticate the contents of an fs-verity file by using the
`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
digital signature of it.
A standard file hash could be used instead of fs-verity. However,
this is inefficient if the file is large and only a small portion may
be accessed. This is often the case for Android application package
(APK) files, for example. These typically contain many translations,
classes, and other resources that are infrequently or even never
accessed on a particular device. It would be slow and wasteful to
read and hash the entire file before starting the application.
Unlike an ahead-of-time hash, fs-verity also re-verifies data each
time it's paged in. This ensures that malicious disk firmware can't
undetectably change the contents of the file at runtime.
fs-verity does not replace or obsolete dm-verity. dm-verity should
still be used on read-only filesystems. fs-verity is for files that
must live on a read-write filesystem because they are independently
updated and potentially user-installed, so dm-verity cannot be used.
The base fs-verity feature is a hashing mechanism only; actually
authenticating the files is up to userspace. However, to meet some
users' needs, fs-verity optionally supports a simple signature
verification mechanism where users can configure the kernel to require
that all fs-verity files be signed by a key loaded into a keyring; see
`Built-in signature verification`_. Support for fs-verity file hashes
in IMA (Integrity Measurement Architecture) policies is also planned.
User API
========
FS_IOC_ENABLE_VERITY
--------------------
The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
follows::
struct fsverity_enable_arg {
__u32 version;
__u32 hash_algorithm;
__u32 block_size;
__u32 salt_size;
__u64 salt_ptr;
__u32 sig_size;
__u32 __reserved1;
__u64 sig_ptr;
__u64 __reserved2[11];
};
This structure contains the parameters of the Merkle tree to build for
the file, and optionally contains a signature. It must be initialized
as follows:
- ``version`` must be 1.
- ``hash_algorithm`` must be the identifier for the hash algorithm to
use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
``include/uapi/linux/fsverity.h`` for the list of possible values.
- ``block_size`` must be the Merkle tree block size. Currently, this
must be equal to the system page size, which is usually 4096 bytes.
Other sizes may be supported in the future. This value is not
necessarily the same as the filesystem block size.
- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
provided. The salt is a value that is prepended to every hashed
block; it can be used to personalize the hashing for a particular
file or device. Currently the maximum salt size is 32 bytes.
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
provided.
- ``sig_size`` is the size of the signature in bytes, or 0 if no
signature is provided. Currently the signature is (somewhat
arbitrarily) limited to 16128 bytes. See `Built-in signature
verification`_ for more information.
- ``sig_ptr`` is the pointer to the signature, or NULL if no
signature is provided.
- All reserved fields must be zeroed.
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
the file and persist it to a filesystem-specific location associated
with the file, then mark the file as a verity file. This ioctl may
take a long time to execute on large files, and it is interruptible by
fatal signals.
FS_IOC_ENABLE_VERITY checks for write access to the inode. However,
it must be executed on an O_RDONLY file descriptor and no processes
can have the file open for writing. Attempts to open the file for
writing while this ioctl is executing will fail with ETXTBSY. (This
is necessary to guarantee that no writable file descriptors will exist
after verity is enabled, and to guarantee that the file's contents are
stable while the Merkle tree is being built over it.)
On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
verity file. On failure (including the case of interruption by a
fatal signal), no changes are made to the file.
FS_IOC_ENABLE_VERITY can fail with the following errors:
- ``EACCES``: the process does not have write access to the file
- ``EBADMSG``: the signature is malformed
- ``EBUSY``: this ioctl is already running on the file
- ``EEXIST``: the file already has verity enabled
- ``EFAULT``: the caller provided inaccessible memory
- ``EINTR``: the operation was interrupted by a fatal signal
- ``EINVAL``: unsupported version, hash algorithm, or block size; or
reserved bits are set; or the file descriptor refers to neither a
regular file nor a directory.
- ``EISDIR``: the file descriptor refers to a directory
- ``EKEYREJECTED``: the signature doesn't match the file
- ``EMSGSIZE``: the salt or signature is too long
- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
needed to verify the signature
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
available in the kernel's crypto API as currently configured (e.g.
for SHA-512, missing CONFIG_CRYPTO_SHA512).
- ``ENOTTY``: this type of filesystem does not implement fs-verity
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
support; or the filesystem superblock has not had the 'verity'
feature enabled on it; or the filesystem does not support fs-verity
on this file. (See `Filesystem support`_.)
- ``EPERM``: the file is append-only; or, a signature is required and
one was not provided.
- ``EROFS``: the filesystem is read-only
- ``ETXTBSY``: someone has the file open for writing. This can be the
caller's file descriptor, another open file descriptor, or the file
reference held by a writable memory map.
FS_IOC_MEASURE_VERITY
---------------------
The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
file. The file measurement is a digest that cryptographically
identifies the file contents that are being enforced on reads.
This ioctl takes in a pointer to a variable-length structure::
struct fsverity_digest {
__u16 digest_algorithm;
__u16 digest_size; /* input/output */
__u8 digest[];
};
``digest_size`` is an input/output field. On input, it must be
initialized to the number of bytes allocated for the variable-length
``digest`` field.
On success, 0 is returned and the kernel fills in the structure as
follows:
- ``digest_algorithm`` will be the hash algorithm used for the file
measurement. It will match ``fsverity_enable_arg::hash_algorithm``.
- ``digest_size`` will be the size of the digest in bytes, e.g. 32
for SHA-256. (This can be redundant with ``digest_algorithm``.)
- ``digest`` will be the actual bytes of the digest.
FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
regardless of the size of the file.
FS_IOC_MEASURE_VERITY can fail with the following errors:
- ``EFAULT``: the caller provided inaccessible memory
- ``ENODATA``: the file is not a verity file
- ``ENOTTY``: this type of filesystem does not implement fs-verity
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
support, or the filesystem superblock has not had the 'verity'
feature enabled on it. (See `Filesystem support`_.)
- ``EOVERFLOW``: the digest is longer than the specified
``digest_size`` bytes. Try providing a larger buffer.
FS_IOC_GETFLAGS
---------------
The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
can also be used to check whether a file has fs-verity enabled or not.
To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
The verity flag is not settable via FS_IOC_SETFLAGS. You must use
FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
Accessing verity files
======================
Applications can transparently access a verity file just like a
non-verity one, with the following exceptions:
- Verity files are readonly. They cannot be opened for writing or
truncate()d, even if the file mode bits allow it. Attempts to do
one of these things will fail with EPERM. However, changes to
metadata such as owner, mode, timestamps, and xattrs are still
allowed, since these are not measured by fs-verity. Verity files
can also still be renamed, deleted, and linked to.
- Direct I/O is not supported on verity files. Attempts to use direct
I/O on such files will fall back to buffered I/O.
- DAX (Direct Access) is not supported on verity files, because this
would circumvent the data verification.
- Reads of data that doesn't match the verity Merkle tree will fail
with EIO (for read()) or SIGBUS (for mmap() reads).
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
file's verity measurement is not signed by a key in the fs-verity
keyring, then opening the file will fail. See `Built-in signature
verification`_.
Direct access to the Merkle tree is not supported. Therefore, if a
verity file is copied, or is backed up and restored, then it will lose
its "verity"-ness. fs-verity is primarily meant for files like
executables that are managed by a package manager.
File measurement computation
============================
This section describes how fs-verity hashes the file contents using a
Merkle tree to produce the "file measurement" which cryptographically
identifies the file contents. This algorithm is the same for all
filesystems that support fs-verity.
Userspace only needs to be aware of this algorithm if it needs to
compute the file measurement itself, e.g. in order to sign the file.
.. _fsverity_merkle_tree:
Merkle tree
-----------
The file contents is divided into blocks, where the block size is
configurable but is usually 4096 bytes. The end of the last block is
zero-padded if needed. Each block is then hashed, producing the first
level of hashes. Then, the hashes in this first level are grouped
into 'blocksize'-byte blocks (zero-padding the ends as needed) and
these blocks are hashed, producing the second level of hashes. This
proceeds up the tree until only a single block remains. The hash of
this block is the "Merkle tree root hash".
If the file fits in one block and is nonempty, then the "Merkle tree
root hash" is simply the hash of the single data block. If the file
is empty, then the "Merkle tree root hash" is all zeroes.
The "blocks" here are not necessarily the same as "filesystem blocks".
If a salt was specified, then it's zero-padded to the closest multiple
of the input size of the hash algorithm's compression function, e.g.
64 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is
prepended to every data or Merkle tree block that is hashed.
The purpose of the block padding is to cause every hash to be taken
over the same amount of data, which simplifies the implementation and
keeps open more possibilities for hardware acceleration. The purpose
of the salt padding is to make the salting "free" when the salted hash
state is precomputed, then imported for each hash.
Example: in the recommended configuration of SHA-256 and 4K blocks,
128 hash values fit in each block. Thus, each level of the Merkle
tree is approximately 128 times smaller than the previous, and for
large files the Merkle tree's size converges to approximately 1/127 of
the original file size. However, for small files, the padding is
significant, making the space overhead proportionally more.
.. _fsverity_descriptor:
fs-verity descriptor
--------------------
By itself, the Merkle tree root hash is ambiguous. For example, it
can't a distinguish a large file from a small second file whose data
is exactly the top-level hash block of the first file. Ambiguities
also arise from the convention of padding to the next block boundary.
To solve this problem, the verity file measurement is actually
computed as a hash of the following structure, which contains the
Merkle tree root hash as well as other fields such as the file size::
struct fsverity_descriptor {
__u8 version; /* must be 1 */
__u8 hash_algorithm; /* Merkle tree hash algorithm */
__u8 log_blocksize; /* log2 of size of data and tree blocks */
__u8 salt_size; /* size of salt in bytes; 0 if none */
__le32 sig_size; /* must be 0 */
__le64 data_size; /* size of file the Merkle tree is built over */
__u8 root_hash[64]; /* Merkle tree root hash */
__u8 salt[32]; /* salt prepended to each hashed block */
__u8 __reserved[144]; /* must be 0's */
};
Note that the ``sig_size`` field must be set to 0 for the purpose of
computing the file measurement, even if a signature was provided (or
will be provided) to `FS_IOC_ENABLE_VERITY`_.
Built-in signature verification
===============================
With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
a portion of an authentication policy (see `Use cases`_) in the
kernel. Specifically, it adds support for:
1. At fs-verity module initialization time, a keyring ".fs-verity" is
created. The root user can add trusted X.509 certificates to this
keyring using the add_key() system call, then (when done)
optionally use keyctl_restrict_keyring() to prevent additional
certificates from being added.
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
detached signature in DER format of the file measurement. On
success, this signature is persisted alongside the Merkle tree.
Then, any time the file is opened, the kernel will verify the
file's actual measurement against this signature, using the
certificates in the ".fs-verity" keyring.
3. A new sysctl "fs.verity.require_signatures" is made available.
When set to 1, the kernel requires that all verity files have a
correctly signed file measurement as described in (2).
File measurements must be signed in the following format, which is
similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
struct fsverity_signed_digest {
char magic[8]; /* must be "FSVerity" */
__le16 digest_algorithm;
__le16 digest_size;
__u8 digest[];
};
fs-verity's built-in signature verification support is meant as a
relatively simple mechanism that can be used to provide some level of
authenticity protection for verity files, as an alternative to doing
the signature verification in userspace or using IMA-appraisal.
However, with this mechanism, userspace programs still need to check
that the verity bit is set, and there is no protection against verity
files being swapped around.
Filesystem support
==================
fs-verity is currently supported by the ext4 and f2fs filesystems.
The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
on either filesystem.
``include/linux/fsverity.h`` declares the interface between the
``fs/verity/`` support layer and filesystems. Briefly, filesystems
must provide an ``fsverity_operations`` structure that provides
methods to read and write the verity metadata to a filesystem-specific
location, including the Merkle tree blocks and
``fsverity_descriptor``. Filesystems must also call functions in
``fs/verity/`` at certain times, such as when a file is opened or when
pages have been read into the pagecache. (See `Verifying data`_.)
ext4
----
ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
To create verity files on an ext4 filesystem, the filesystem must have
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
it. "verity" is an RO_COMPAT filesystem feature, so once set, old
kernels will only be able to mount the filesystem readonly, and old
versions of e2fsck will be unable to check the filesystem. Moreover,
currently ext4 only supports mounting a filesystem with the "verity"
feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
ext4 also supports encryption, which can be used simultaneously with
fs-verity. In this case, the plaintext data is verified rather than
the ciphertext. This is necessary in order to make the file
measurement meaningful, since every file is encrypted differently.
ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
past the end of the file, starting at the first 64K boundary beyond
i_size. This approach works because (a) verity files are readonly,
and (b) pages fully beyond i_size aren't visible to userspace but can
be read/written internally by ext4 with only some relatively small
changes to ext4. This approach avoids having to depend on the
EA_INODE feature and on rearchitecturing ext4's xattr support to
support paging multi-gigabyte xattrs into memory, and to support
encrypting xattrs. Note that the verity metadata *must* be encrypted
when the file is, since it contains hashes of the plaintext data.
Currently, ext4 verity only supports the case where the Merkle tree
block size, filesystem block size, and page size are all the same. It
also only supports extent-based files.
f2fs
----
f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
To create verity files on an f2fs filesystem, the filesystem must have
been formatted with ``-O verity``.
f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
cleared.
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first
64K boundary beyond i_size. See explanation for ext4 above.
Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
which wouldn't be enough for even a single Merkle tree block.
Currently, f2fs verity only supports a Merkle tree block size of 4096.
Also, f2fs doesn't support enabling verity on files that currently
have atomic or volatile writes pending.
Implementation details
======================
Verifying data
--------------
fs-verity ensures that all reads of a verity file's data are verified,
regardless of which syscall is used to do the read (e.g. mmap(),
read(), pread()) and regardless of whether it's the first read or a
later read (unless the later read can return cached data that was
already verified). Below, we describe how filesystems implement this.
Pagecache
~~~~~~~~~
For filesystems using Linux's pagecache, the ``->readpage()`` and
``->readpages()`` methods must be modified to verify pages before they
are marked Uptodate. Merely hooking ``->read_iter()`` would be
insufficient, since ``->read_iter()`` is not used for memory maps.
Therefore, fs/verity/ provides a function fsverity_verify_page() which
verifies a page that has been read into the pagecache of a verity
inode, but is still locked and not Uptodate, so it's not yet readable
by userspace. As needed to do the verification,
fsverity_verify_page() will call back into the filesystem to read
Merkle tree pages via fsverity_operations::read_merkle_tree_page().
fsverity_verify_page() returns false if verification failed; in this
case, the filesystem must not set the page Uptodate. Following this,
as per the usual Linux pagecache behavior, attempts by userspace to
read() from the part of the file containing the page will fail with
EIO, and accesses to the page within a memory map will raise SIGBUS.
fsverity_verify_page() currently only supports the case where the
Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
In principle, fsverity_verify_page() verifies the entire path in the
Merkle tree from the data page to the root hash. However, for
efficiency the filesystem may cache the hash pages. Therefore,
fsverity_verify_page() only ascends the tree reading hash pages until
an already-verified hash page is seen, as indicated by the PageChecked
bit being set. It then verifies the path to that page.
This optimization, which is also used by dm-verity, results in
excellent sequential read performance. This is because usually (e.g.
127 in 128 times for 4K blocks and SHA-256) the hash page from the
bottom level of the tree will already be cached and checked from
reading a previous data page. However, random reads perform worse.
Block device based filesystems
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
the pagecache, so the above subsection applies too. However, they
also usually read many pages from a file at once, grouped into a
structure called a "bio". To make it easier for these types of
filesystems to support fs-verity, fs/verity/ also provides a function
fsverity_verify_bio() which verifies all pages in a bio.
ext4 and f2fs also support encryption. If a verity file is also
encrypted, the pages must be decrypted before being verified. To
support this, these filesystems allocate a "post-read context" for
each bio and store it in ``->bi_private``::
struct bio_post_read_ctx {
struct bio *bio;
struct work_struct work;
unsigned int cur_step;
unsigned int enabled_steps;
};
``enabled_steps`` is a bitmask that specifies whether decryption,
verity, or both is enabled. After the bio completes, for each needed
postprocessing step the filesystem enqueues the bio_post_read_ctx on a
workqueue, and then the workqueue work does the decryption or
verification. Finally, pages where no decryption or verity error
occurred are marked Uptodate, and the pages are unlocked.
Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
simply zeroes holes and sets the corresponding pages Uptodate; no bios
are issued. To prevent this case from bypassing fs-verity, these
filesystems use fsverity_verify_page() to verify hole pages.
ext4 and f2fs disable direct I/O on verity files, since otherwise
direct I/O would bypass fs-verity. (They also do the same for
encrypted files.)
Userspace utility
=================
This document focuses on the kernel, but a userspace utility for
fs-verity can be found at:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
See the README.md file in the fsverity-utils source tree for details,
including examples of setting up fs-verity protected files.
Tests
=====
To test fs-verity, use xfstests. For example, using `kvm-xfstests
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
kvm-xfstests -c ext4,f2fs -g verity
FAQ
===
This section answers frequently asked questions about fs-verity that
weren't already directly answered in other parts of this document.
:Q: Why isn't fs-verity part of IMA?
:A: fs-verity and IMA (Integrity Measurement Architecture) have
different focuses. fs-verity is a filesystem-level mechanism for
hashing individual files using a Merkle tree. In contrast, IMA
specifies a system-wide policy that specifies which files are
hashed and what to do with those hashes, such as log them,
authenticate them, or add them to a measurement list.
IMA is planned to support the fs-verity hashing mechanism as an
alternative to doing full file hashes, for people who want the
performance and security benefits of the Merkle tree based hash.
But it doesn't make sense to force all uses of fs-verity to be
through IMA. As a standalone filesystem feature, fs-verity
already meets many users' needs, and it's testable like other
filesystem features e.g. with xfstests.
:Q: Isn't fs-verity useless because the attacker can just modify the
hashes in the Merkle tree, which is stored on-disk?
:A: To verify the authenticity of an fs-verity file you must verify
the authenticity of the "file measurement", which is basically the
root hash of the Merkle tree. See `Use cases`_.
:Q: Isn't fs-verity useless because the attacker can just replace a
verity file with a non-verity one?
:A: See `Use cases`_. In the initial use case, it's really trusted
userspace code that authenticates the files; fs-verity is just a
tool to do this job efficiently and securely. The trusted
userspace code will consider non-verity files to be inauthentic.
:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you
store just the root hash?
:A: If the Merkle tree wasn't stored on-disk, then you'd have to
compute the entire tree when the file is first accessed, even if
just one byte is being read. This is a fundamental consequence of
how Merkle tree hashing works. To verify a leaf node, you need to
verify the whole path to the root hash, including the root node
(the thing which the root hash is a hash of). But if the root
node isn't stored on-disk, you have to compute it by hashing its
children, and so on until you've actually hashed the entire file.
That defeats most of the point of doing a Merkle tree-based hash,
since if you have to hash the whole file ahead of time anyway,
then you could simply do sha256(file) instead. That would be much
simpler, and a bit faster too.
It's true that an in-memory Merkle tree could still provide the
advantage of verification on every read rather than just on the
first read. However, it would be inefficient because every time a
hash page gets evicted (you can't pin the entire Merkle tree into
memory, since it may be very large), in order to restore it you
again need to hash everything below it in the tree. This again
defeats most of the point of doing a Merkle tree-based hash, since
a single block read could trigger re-hashing gigabytes of data.
:Q: But couldn't you store just the leaf nodes and compute the rest?
:A: See previous answer; this really just moves up one level, since
one could alternatively interpret the data blocks as being the
leaf nodes of the Merkle tree. It's true that the tree can be
computed much faster if the leaf level is stored rather than just
the data, but that's only because each level is less than 1% the
size of the level below (assuming the recommended settings of
SHA-256 and 4K blocks). For the exact same reason, by storing
"just the leaf nodes" you'd already be storing over 99% of the
tree, so you might as well simply store the whole tree.
:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
part of a package that is installed to many computers?
:A: This isn't currently supported. It was part of the original
design, but was removed to simplify the kernel UAPI and because it
wasn't a critical use case. Files are usually installed once and
used many times, and cryptographic hashing is somewhat fast on
most modern processors.
:Q: Why doesn't fs-verity support writes?
:A: Write support would be very difficult and would require a
completely different design, so it's well outside the scope of
fs-verity. Write support would require:
- A way to maintain consistency between the data and hashes,
including all levels of hashes, since corruption after a crash
(especially of potentially the entire file!) is unacceptable.
The main options for solving this are data journalling,
copy-on-write, and log-structured volume. But it's very hard to
retrofit existing filesystems with new consistency mechanisms.
Data journalling is available on ext4, but is very slow.
- Rebuilding the the Merkle tree after every write, which would be
extremely inefficient. Alternatively, a different authenticated
dictionary structure such as an "authenticated skiplist" could
be used. However, this would be far more complex.
Compare it to dm-verity vs. dm-integrity. dm-verity is very
simple: the kernel just verifies read-only data against a
read-only Merkle tree. In contrast, dm-integrity supports writes
but is slow, is much more complex, and doesn't actually support
full-device authentication since it authenticates each sector
independently, i.e. there is no "root hash". It doesn't really
make sense for the same device-mapper target to support these two
very different cases; the same applies to fs-verity.
:Q: Since verity files are immutable, why isn't the immutable bit set?
:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
specific set of semantics which not only make the file contents
read-only, but also prevent the file from being deleted, renamed,
linked to, or having its owner or mode changed. These extra
properties are unwanted for fs-verity, so reusing the immutable
bit isn't appropriate.
:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
:A: Abusing the xattr interface for basically arbitrary syscalls is
heavily frowned upon by most of the Linux filesystem developers.
An xattr should really just be an xattr on-disk, not an API to
e.g. magically trigger construction of a Merkle tree.
:Q: Does fs-verity support remote filesystems?
:A: Only ext4 and f2fs support is implemented currently, but in
principle any filesystem that can store per-file verity metadata
can support fs-verity, regardless of whether it's local or remote.
Some filesystems may have fewer options of where to store the
verity metadata; one possibility is to store it past the end of
the file and "hide" it from userspace by manipulating i_size. The
data verification functions provided by ``fs/verity/`` also assume
that the filesystem uses the Linux pagecache, but both local and
remote filesystems normally do so.
:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity
be implemented entirely at the VFS level?
:A: There are many reasons why this is not possible or would be very
difficult, including the following:
- To prevent bypassing verification, pages must not be marked
Uptodate until they've been verified. Currently, each
filesystem is responsible for marking pages Uptodate via
``->readpages()``. Therefore, currently it's not possible for
the VFS to do the verification on its own. Changing this would
require significant changes to the VFS and all filesystems.
- It would require defining a filesystem-independent way to store
the verity metadata. Extended attributes don't work for this
because (a) the Merkle tree may be gigabytes, but many
filesystems assume that all xattrs fit into a single 4K
filesystem block, and (b) ext4 and f2fs encryption doesn't
encrypt xattrs, yet the Merkle tree *must* be encrypted when the
file contents are, because it stores hashes of the plaintext
file contents.
So the verity metadata would have to be stored in an actual
file. Using a separate file would be very ugly, since the
metadata is fundamentally part of the file to be protected, and
it could cause problems where users could delete the real file
but not the metadata file or vice versa. On the other hand,
having it be in the same file would break applications unless
filesystems' notion of i_size were divorced from the VFS's,
which would be complex and require changes to all filesystems.
- It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
transaction mechanism so that either the file ends up with
verity enabled, or no changes were made. Allowing intermediate
states to occur after a crash may cause problems.

View file

@ -359,3 +359,4 @@ encryption of files and directories.
:maxdepth: 2
fscrypt
fsverity

View file

@ -889,6 +889,7 @@ Writeback: 0 kB
AnonPages: 861800 kB
Mapped: 280372 kB
Shmem: 644 kB
KReclaimable: 168048 kB
Slab: 284364 kB
SReclaimable: 159856 kB
SUnreclaim: 124508 kB
@ -956,6 +957,9 @@ AnonHugePages: Non-file backed huge pages mapped into userspace page tables
ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
with huge pages
ShmemPmdMapped: Shared memory mapped into userspace with huge pages
KReclaimable: Kernel allocations that the kernel will attempt to reclaim
under memory pressure. Includes SReclaimable (below), and other
direct allocations with a shrinker.
Slab: in-kernel data structures cache
SReclaimable: Part of Slab, that might be reclaimed, such as caches
SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure

View file

@ -6031,6 +6031,18 @@ S: Maintained
F: fs/notify/
F: include/linux/fsnotify*.h
FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
M: Eric Biggers <ebiggers@kernel.org>
M: Theodore Y. Ts'o <tytso@mit.edu>
L: linux-fscrypt@vger.kernel.org
Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
S: Supported
F: fs/verity/
F: include/linux/fsverity.h
F: include/uapi/linux/fsverity.h
F: Documentation/filesystems/fsverity.rst
FUJITSU LAPTOP EXTRAS
M: Jonathan Woithe <jwoithe@just42.net>
L: platform-driver-x86@vger.kernel.org

View file

@ -839,10 +839,24 @@ LDFLAGS_vmlinux += --gc-sections
endif
ifdef CONFIG_LTO_CLANG
lto-clang-flags := -flto -fvisibility=hidden
ifdef CONFIG_THINLTO
lto-clang-flags := -flto=thin
KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache
else
lto-clang-flags := -flto
endif
lto-clang-flags += -fvisibility=default $(call cc-option, -fsplit-lto-unit)
# Limit inlining across translation units to reduce binary size
LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5
KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG)
KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG)
KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds
# allow disabling only clang LTO where needed
DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
DISABLE_LTO_CLANG := -fno-lto
export DISABLE_LTO_CLANG
endif
@ -855,7 +869,7 @@ export LTO_CFLAGS DISABLE_LTO
endif
ifdef CONFIG_CFI_CLANG
cfi-clang-flags += -fsanitize=cfi $(call cc-option, -fsplit-lto-unit)
cfi-clang-flags += -fsanitize=cfi -fno-sanitize-cfi-canonical-jump-tables
DISABLE_CFI_CLANG := -fno-sanitize=cfi
ifdef CONFIG_MODULES
cfi-clang-flags += -fsanitize-cfi-cross-dso
@ -880,6 +894,12 @@ DISABLE_LTO += $(DISABLE_CFI)
export CFI_CFLAGS DISABLE_CFI
endif
ifdef CONFIG_SHADOW_CALL_STACK
CC_FLAGS_SCS := -fsanitize=shadow-call-stack
KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

File diff suppressed because it is too large Load diff

857
abi_gki_aarch64_whitelist Normal file
View file

@ -0,0 +1,857 @@
[abi_whitelist]
add_timer
add_uevent_var
add_wait_queue
alloc_chrdev_region
__alloc_disk_node
alloc_etherdev_mqs
alloc_netdev_mqs
alloc_pages_exact
__alloc_pages_nodemask
__alloc_percpu
__alloc_skb
__alloc_workqueue_key
arch_bpf_jit_check_func
__arch_copy_from_user
__arch_copy_to_user
arm64_const_caps_ready
autoremove_wake_function
bcmp
blk_cleanup_queue
blk_execute_rq
blk_get_queue
blk_get_request
blk_mq_alloc_tag_set
blk_mq_complete_request
__blk_mq_end_request
blk_mq_end_request
blk_mq_free_tag_set
blk_mq_init_queue
blk_mq_quiesce_queue
blk_mq_requeue_request
blk_mq_run_hw_queues
blk_mq_start_request
blk_mq_start_stopped_hw_queues
blk_mq_stop_hw_queue
blk_mq_unquiesce_queue
blk_mq_virtio_map_queues
blk_put_queue
blk_put_request
blk_queue_alignment_offset
blk_queue_bounce_limit
blk_queue_flag_clear
blk_queue_flag_set
blk_queue_io_min
blk_queue_io_opt
blk_queue_logical_block_size
blk_queue_max_discard_sectors
blk_queue_max_hw_sectors
blk_queue_max_segments
blk_queue_max_segment_size
blk_queue_physical_block_size
blk_queue_rq_timeout
blk_queue_write_cache
blk_rq_map_kern
blk_rq_map_sg
blk_status_to_errno
blk_update_request
bpf_prog_add
bpf_prog_put
bpf_prog_sub
bpf_trace_run10
bpf_trace_run2
bpf_trace_run8
bpf_warn_invalid_xdp_action
build_skb
bus_register
bus_unregister
call_netdevice_notifiers
call_rcu
cancel_delayed_work
cancel_delayed_work_sync
cancel_work_sync
capable
cdev_add
cdev_alloc
cdev_del
cdev_device_add
cdev_device_del
cdev_init
cfg80211_connect_done
cfg80211_disconnected
cfg80211_inform_bss_data
cfg80211_put_bss
cfg80211_scan_done
__cfi_slowpath
check_disk_change
__check_object_size
__class_create
class_destroy
__class_register
class_unregister
clear_page
clk_disable
clk_enable
clk_get_rate
clk_prepare
clk_unprepare
complete
complete_all
completion_done
console_suspend_enabled
__const_udelay
consume_skb
_copy_from_iter_full
copy_page
_copy_to_iter
cpu_bit_bitmap
cpufreq_generic_attr
cpufreq_register_driver
cpufreq_unregister_driver
__cpuhp_remove_state
__cpuhp_setup_state
__cpuhp_state_add_instance
__cpuhp_state_remove_instance
cpu_hwcap_keys
cpu_hwcaps
cpumask_next
cpumask_next_wrap
cpu_number
__cpu_online_mask
cpus_read_lock
cpus_read_unlock
cpu_topology
crypto_ablkcipher_type
crypto_dequeue_request
crypto_enqueue_request
crypto_init_queue
crypto_register_alg
crypto_unregister_alg
datagram_poll
debugfs_create_dir
debugfs_create_file
debugfs_create_x32
debugfs_remove
debugfs_remove_recursive
debug_smp_processor_id
default_llseek
default_wake_function
delayed_work_timer_fn
del_gendisk
del_timer
del_timer_sync
destroy_workqueue
dev_add_pack
dev_close
dev_driver_string
_dev_err
dev_fwnode
__dev_get_by_index
dev_get_by_index
dev_get_by_index_rcu
dev_get_stats
device_add
device_add_disk
device_create
device_create_file
device_del
device_destroy
device_for_each_child
device_initialize
device_property_present
device_property_read_u32_array
device_register
device_remove_file
device_unregister
_dev_info
__dev_kfree_skb_any
devm_clk_get
dev_mc_sync_multiple
dev_mc_unsync
devm_gpiod_get_index
devm_gpio_request_one
devm_ioremap
devm_ioremap_resource
devm_kfree
devm_kmalloc
devm_regulator_get_optional
__devm_request_region
devm_request_threaded_irq
__devm_reset_control_get
devm_rtc_allocate_device
_dev_notice
dev_open
dev_pm_domain_attach
dev_pm_domain_detach
dev_printk
dev_queue_xmit
dev_remove_pack
devres_add
__devres_alloc_node
devres_destroy
devres_free
dev_set_mtu
dev_set_name
dev_uc_sync_multiple
dev_uc_unsync
_dev_warn
disable_irq
dma_alloc_from_dev_coherent
dma_buf_export
dma_buf_fd
dma_buf_get
dma_buf_put
dma_fence_context_alloc
dma_fence_enable_sw_signaling
dma_fence_init
dma_fence_match_context
dma_fence_release
dma_fence_signal
dma_fence_signal_locked
dma_fence_wait_timeout
dma_release_from_dev_coherent
driver_register
driver_unregister
drm_add_edid_modes
drm_add_modes_noedid
drm_atomic_helper_check
drm_atomic_helper_cleanup_planes
drm_atomic_helper_commit
drm_atomic_helper_commit_hw_done
drm_atomic_helper_commit_modeset_disables
drm_atomic_helper_commit_modeset_enables
drm_atomic_helper_commit_planes
drm_atomic_helper_connector_destroy_state
drm_atomic_helper_connector_duplicate_state
drm_atomic_helper_connector_reset
drm_atomic_helper_crtc_destroy_state
drm_atomic_helper_crtc_duplicate_state
drm_atomic_helper_crtc_reset
drm_atomic_helper_disable_plane
drm_atomic_helper_page_flip
drm_atomic_helper_plane_destroy_state
drm_atomic_helper_plane_duplicate_state
drm_atomic_helper_plane_reset
drm_atomic_helper_set_config
drm_atomic_helper_shutdown
drm_atomic_helper_update_plane
drm_atomic_helper_wait_for_vblanks
drm_class_device_register
drm_class_device_unregister
drm_clflush_pages
drm_compat_ioctl
drm_connector_attach_edid_property
drm_connector_attach_encoder
drm_connector_cleanup
drm_connector_init
drm_connector_register
drm_connector_unregister
drm_connector_update_edid_property
drm_crtc_cleanup
drm_crtc_init_with_planes
drm_crtc_send_vblank_event
drm_cvt_mode
drm_dbg
drm_debugfs_create_files
drm_dev_alloc
drm_dev_put
drm_dev_register
drm_dev_set_unique
drm_do_get_edid
drm_encoder_cleanup
drm_encoder_init
drm_err
drm_framebuffer_init
drm_gem_fb_create_handle
drm_gem_fb_destroy
drm_gem_handle_create
drm_gem_object_init
drm_gem_object_lookup
drm_gem_object_put_unlocked
drm_gem_object_release
drm_gem_prime_export
drm_gem_prime_fd_to_handle
drm_gem_prime_handle_to_fd
drm_gem_prime_import
drm_gem_prime_mmap
drm_global_item_ref
drm_global_item_unref
drm_helper_hpd_irq_event
drm_helper_mode_fill_fb_struct
drm_helper_probe_single_connector_modes
drm_ht_create
drm_ht_find_item
drm_ht_insert_item
drm_ht_just_insert_please
drm_ht_remove
drm_ht_remove_item
drm_ioctl
drm_kms_helper_hotplug_event
drm_mm_init
drm_mm_insert_node_in_range
drm_mm_print
drm_mm_remove_node
drm_mm_takedown
drm_mode_config_cleanup
drm_mode_config_init
drm_mode_config_reset
drm_mode_probed_add
drm_open
drm_plane_cleanup
drm_poll
drm_prime_pages_to_sg
__drm_printfn_debug
drm_put_dev
drm_read
drm_release
drm_set_preferred_mode
drm_universal_plane_init
drm_vma_offset_add
drm_vma_offset_lookup_locked
drm_vma_offset_manager_destroy
drm_vma_offset_manager_init
drm_vma_offset_remove
dummy_dma_ops
eth_commit_mac_addr_change
ether_setup
eth_prepare_mac_addr_change
__ethtool_get_link_ksettings
ethtool_op_get_link
ethtool_op_get_ts_info
eth_type_trans
eth_validate_addr
event_triggers_call
fasync_helper
fd_install
finish_wait
flow_keys_basic_dissector
flush_work
flush_workqueue
fput
free_irq
free_netdev
__free_pages
free_pages_exact
free_percpu
freezing_slow_path
fsl8250_handle_irq
generic_file_llseek
get_device
get_random_bytes
__get_task_comm
get_unused_fd_flags
gpiod_cansleep
gpiod_get_raw_value
gpiod_get_raw_value_cansleep
gpiod_get_value
gpiod_get_value_cansleep
gpiod_is_active_low
gpiod_set_debounce
gpiod_to_irq
gpio_to_desc
hrtimer_active
hrtimer_cancel
hrtimer_forward
hrtimer_init
hrtimer_start_range_ns
hvc_alloc
hvc_instantiate
hvc_kick
hvc_poll
hvc_remove
__hvc_resize
hwrng_register
hwrng_unregister
ida_alloc_range
ida_destroy
ida_free
init_net
init_timer_key
init_wait_entry
__init_waitqueue_head
input_alloc_absinfo
input_allocate_device
input_event
input_free_device
input_mt_init_slots
input_register_device
input_set_abs_params
input_unregister_device
iomem_resource
__ioremap
__iounmap
irq_dispose_mapping
irq_set_affinity_hint
irq_set_irq_wake
jiffies
jiffies_to_msecs
kernel_kobj
kfree
kfree_call_rcu
kfree_skb
kill_fasync
kimage_voffset
__kmalloc
kmalloc_caches
kmalloc_order_trace
kmem_cache_alloc
kmem_cache_alloc_trace
kmem_cache_create
kmem_cache_destroy
kmem_cache_free
kmemdup
kobject_del
kobject_init_and_add
kobject_put
kobject_uevent
kobject_uevent_env
kstrtoull
kthread_create_on_node
kthread_create_worker
kthread_destroy_worker
kthread_queue_work
kthread_should_stop
kthread_stop
ktime_get
ktime_get_real_seconds
ktime_get_ts64
ktime_get_with_offset
kvfree
kvmalloc_node
kzfree
led_classdev_unregister
led_trigger_event
led_trigger_register_simple
led_trigger_unregister_simple
__ll_sc_atomic64_add
__ll_sc_atomic64_add_return_relaxed
__ll_sc_atomic64_andnot
__ll_sc_atomic64_fetch_or
__ll_sc_atomic64_or
__ll_sc_atomic_add
__ll_sc_atomic_add_return
__ll_sc_atomic_sub
__ll_sc_atomic_sub_return
__ll_sc___cmpxchg_case_mb_4
__ll_sc___cmpxchg_case_mb_8
__local_bh_enable_ip
lock_sock_nested
mark_page_accessed
memcpy
__memcpy_fromio
__memcpy_toio
memdup_user
memmove
memparse
memset
__memset_io
memstart_addr
memzero_explicit
misc_deregister
misc_register
mod_timer
__module_get
module_put
__msecs_to_jiffies
msleep
__mutex_init
mutex_lock
mutex_lock_interruptible
mutex_trylock
mutex_unlock
__napi_alloc_skb
napi_complete_done
napi_consume_skb
napi_disable
napi_gro_receive
napi_hash_del
__napi_schedule
napi_schedule_prep
__netdev_alloc_skb
netdev_change_features
netdev_err
netdev_increment_features
netdev_info
netdev_lower_state_changed
netdev_master_upper_dev_link
netdev_notify_peers
netdev_rx_handler_register
netdev_rx_handler_unregister
netdev_upper_dev_link
netdev_upper_dev_unlink
netdev_warn
netif_carrier_off
netif_carrier_on
netif_device_attach
netif_device_detach
netif_napi_add
netif_napi_del
netif_receive_skb
netif_rx
netif_rx_ni
netif_schedule_queue
netif_set_real_num_rx_queues
netif_set_real_num_tx_queues
__netif_set_xps_queue
netif_stacked_transfer_operstate
netif_tx_stop_all_queues
netif_tx_wake_queue
netlink_capable
__netlink_dump_start
net_ratelimit
nf_conntrack_destroy
nla_memcpy
nla_parse
nla_put
__nlmsg_put
no_llseek
nonseekable_open
noop_llseek
nr_cpu_ids
nr_swap_pages
nsecs_to_jiffies
of_address_to_resource
of_alias_get_id
of_device_is_big_endian
of_device_is_compatible
of_find_property
of_get_child_by_name
of_get_next_child
of_get_property
of_irq_get
of_led_classdev_register
of_match_device
of_parse_phandle
of_property_read_u64
of_property_read_variable_u32_array
panic
param_ops_bool
param_ops_int
param_ops_uint
passthru_features_check
pci_alloc_irq_vectors_affinity
pci_bus_type
pci_disable_device
pci_enable_device
pci_find_capability
pci_find_ext_capability
pci_find_next_capability
pci_free_irq_vectors
pci_iomap_range
pci_irq_get_affinity
pci_irq_vector
pci_read_config_byte
pci_read_config_dword
__pci_register_driver
pci_release_selected_regions
pci_request_selected_regions
pci_set_master
pci_unregister_driver
PDE_DATA
__per_cpu_offset
perf_trace_buf_alloc
perf_trace_run_bpf_submit
pipe_lock
pipe_unlock
platform_device_add
platform_device_add_data
platform_device_alloc
platform_device_del
platform_device_put
platform_device_register_full
platform_device_unregister
__platform_driver_register
platform_driver_unregister
platform_get_irq
platform_get_resource
pm_generic_resume
pm_generic_runtime_resume
pm_generic_runtime_suspend
pm_generic_suspend
__pm_runtime_disable
pm_runtime_enable
__pm_runtime_idle
__pm_runtime_resume
pm_runtime_set_autosuspend_delay
__pm_runtime_set_status
__pm_runtime_suspend
__pm_runtime_use_autosuspend
pm_wakeup_dev_event
prandom_u32
preempt_count_add
preempt_count_sub
preempt_schedule
preempt_schedule_notrace
prepare_to_wait
prepare_to_wait_event
printk
proc_create_net_single
proc_mkdir_data
proto_register
proto_unregister
__put_cred
put_device
put_disk
__put_page
put_unused_fd
queue_delayed_work_on
queue_work_on
___ratelimit
_raw_read_lock
_raw_read_unlock
_raw_spin_lock
_raw_spin_lock_bh
_raw_spin_lock_irq
_raw_spin_lock_irqsave
_raw_spin_trylock
_raw_spin_unlock
_raw_spin_unlock_bh
_raw_spin_unlock_irq
_raw_spin_unlock_irqrestore
_raw_write_lock_bh
_raw_write_unlock_bh
rcu_barrier
__rcu_read_lock
__rcu_read_unlock
refcount_dec_and_test_checked
refcount_inc_checked
refcount_inc_not_zero_checked
__refrigerator
register_blkdev
__register_chrdev
register_netdev
register_netdevice
register_netdevice_notifier
register_pernet_subsys
register_pm_notifier
register_shrinker
regulator_count_voltages
regulator_disable
regulator_enable
regulator_get_current_limit
regulator_get_voltage
regulator_is_supported_voltage
regulator_list_voltage
regulator_set_voltage
release_sock
remove_conflicting_framebuffers
remove_proc_entry
remove_wait_queue
__request_module
request_threaded_irq
reservation_object_add_excl_fence
reservation_object_add_shared_fence
reservation_object_copy_fences
reservation_object_reserve_shared
reservation_object_test_signaled_rcu
reservation_object_wait_timeout_rcu
reservation_ww_class
reset_control_assert
reset_control_deassert
revalidate_disk
round_jiffies
__rtc_register_device
rtc_time64_to_tm
rtc_tm_to_time64
rtc_update_irq
rtnl_is_locked
rtnl_link_register
rtnl_link_unregister
rtnl_lock
rtnl_register_module
rtnl_unlock
rtnl_unregister
rtnl_unregister_all
sched_setscheduler
schedule
schedule_timeout
scnprintf
security_sock_graft
send_sig
seq_lseek
seq_printf
seq_putc
seq_puts
seq_read
serial8250_get_port
serial8250_register_8250_port
serial8250_resume_port
serial8250_suspend_port
serial8250_unregister_port
set_disk_ro
set_page_dirty
sg_alloc_table
sg_alloc_table_from_pages
sg_copy_from_buffer
sg_copy_to_buffer
sg_free_table
sg_init_one
sg_init_table
sg_miter_next
sg_miter_start
sg_miter_stop
sg_nents
sg_nents_for_len
sg_next
shmem_file_setup
shmem_read_mapping_page_gfp
si_mem_available
si_meminfo
simple_attr_open
simple_attr_read
simple_attr_release
simple_attr_write
simple_read_from_buffer
simple_strtoul
single_open
single_release
sk_alloc
skb_add_rx_frag
skb_clone
skb_coalesce_rx_frag
skb_copy
skb_dequeue
__skb_flow_dissect
skb_free_datagram
skb_page_frag_refill
skb_partial_csum_set
skb_put
skb_queue_purge
skb_queue_tail
skb_recv_datagram
skb_to_sgvec
skb_trim
skb_tstamp_tx
sk_free
snprintf
sock_alloc_send_skb
sock_diag_register
sock_diag_save_cookie
sock_diag_unregister
sock_efree
sock_get_timestamp
sock_i_ino
sock_init_data
sock_no_accept
sock_no_bind
sock_no_connect
sock_no_getname
sock_no_getsockopt
sock_no_ioctl
sock_no_listen
sock_no_mmap
sock_no_sendpage
sock_no_setsockopt
sock_no_shutdown
sock_no_socketpair
sock_queue_rcv_skb
__sock_recv_ts_and_drops
sock_register
__sock_tx_timestamp
sock_unregister
__splice_from_pipe
sprintf
sscanf
__stack_chk_fail
__stack_chk_guard
strcmp
strcpy
string_get_size
strlcpy
strlen
strncmp
strncpy
strstr
__sw_hweight64
swiotlb_max_segment
sync_file_create
sync_file_get_fence
synchronize_hardirq
synchronize_irq
synchronize_net
synchronize_rcu
sysfs_create_bin_file
sysfs_create_group
__sysfs_match_string
sysfs_remove_bin_file
sysfs_remove_group
system_freezable_wq
system_freezing_cnt
system_wq
__tasklet_hi_schedule
tasklet_init
tasklet_kill
__tasklet_schedule
trace_define_field
trace_event_buffer_commit
trace_event_buffer_reserve
trace_event_ignore_this_pid
trace_event_raw_init
trace_event_reg
trace_handle_return
__tracepoint_dma_fence_emit
__tracepoint_xdp_exception
trace_print_symbols_seq
trace_raw_output_prep
trace_seq_printf
try_module_get
unlock_page
unmap_mapping_range
unregister_blkdev
__unregister_chrdev
unregister_chrdev_region
unregister_netdev
unregister_netdevice_notifier
unregister_netdevice_queue
unregister_pernet_subsys
unregister_pm_notifier
unregister_shrinker
up_read
usb_add_gadget_udc
usb_add_hcd
usb_create_hcd
usb_create_shared_hcd
usb_del_gadget_udc
usb_disabled
usb_ep_set_maxpacket_limit
usb_gadget_giveback_request
usb_gadget_udc_reset
usb_get_dev
usb_hcd_check_unlink_urb
usb_hcd_giveback_urb
usb_hcd_is_primary_hcd
usb_hcd_link_urb_to_ep
usb_hcd_poll_rh_status
usb_hcd_resume_root_hub
usb_hcd_unlink_urb_from_ep
usb_put_dev
usb_put_hcd
usb_remove_hcd
usleep_range
vmalloc_to_page
vmap
vm_get_page_prot
vm_insert_mixed
vm_insert_pfn
vunmap
wait_for_completion
wait_for_completion_killable
wait_woken
__wake_up
wake_up_process
__warn_printk
wiphy_free
wiphy_new_nm
wiphy_register
wiphy_unregister
woken_wake_function
ww_mutex_lock
ww_mutex_lock_interruptible
ww_mutex_unlock
xdp_do_flush_map
xdp_do_redirect
xdp_return_frame
xdp_return_frame_rx_napi
xdp_rxq_info_reg
xdp_rxq_info_reg_mem_model
xdp_rxq_info_unreg

View file

@ -486,6 +486,18 @@ config ARCH_SUPPORTS_LTO_CLANG
- compiling inline assembly with clang's integrated assembler,
- and linking with LLD.
config ARCH_SUPPORTS_THINLTO
bool
help
An architecture should select this if it supports clang's ThinLTO.
config THINLTO
bool "Use clang ThinLTO (EXPERIMENTAL)"
depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO
default y
help
Use ThinLTO to speed up Link Time Optimization.
choice
prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
default LTO_NONE
@ -542,6 +554,39 @@ config CFI_CLANG_SHADOW
If you select this option, the kernel builds a fast look-up table of
CFI check functions in loaded modules to reduce overhead.
config ARCH_SUPPORTS_SHADOW_CALL_STACK
bool
help
An architecture should select this if it supports Clang's Shadow
Call Stack, has asm/scs.h, and implements runtime support for shadow
stack switching.
config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
shadow stack to protect function return addresses from being
overwritten by an attacker. More information can be found from
Clang's documentation:
https://clang.llvm.org/docs/ShadowCallStack.html
Note that security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of shadow
stacks used by other tasks and interrupt handlers in memory, which
means an attacker capable reading and writing arbitrary memory may
be able to locate them and hijack control flow by modifying shadow
stacks that are not currently in use.
config SHADOW_CALL_STACK_VMAP
bool "Use virtually mapped shadow call stacks"
depends on SHADOW_CALL_STACK
help
Use virtually mapped shadow call stacks. Selecting this option
provides better stack exhaustion protection, but increases per-thread
memory consumption as a full page is allocated for each shadow stack.
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help

View file

@ -70,7 +70,7 @@ config ARM
select HAVE_EXIT_THREAD
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) && (CC_IS_GCC || CLANG_VERSION >= 100000)
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))

View file

@ -44,7 +44,10 @@ KBUILD_CFLAGS += $(call cc-option,-mno-unaligned-access)
endif
ifeq ($(CONFIG_FRAME_POINTER),y)
KBUILD_CFLAGS +=-fno-omit-frame-pointer $(call cc-option,-mapcs,) $(call cc-option,-mno-sched-prolog,)
KBUILD_CFLAGS +=-fno-omit-frame-pointer
ifeq ($(CONFIG_CC_IS_GCC),y)
KBUILD_CFLAGS += $(call cc-option,-mapcs,) $(call cc-option,-mno-sched-prolog,)
endif
endif
ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
@ -120,6 +123,10 @@ ifeq ($(CONFIG_ARM_UNWIND),y)
CFLAGS_ABI +=-funwind-tables
endif
ifeq ($(CONFIG_CC_IS_CLANG),y)
CFLAGS_ABI += -meabi gnu
endif
# Accept old syntax despite ".syntax unified"
AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)

View file

@ -700,7 +700,6 @@ CONFIG_TEST_USER_COPY=m
CONFIG_MEMTEST=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_PANIC_ON_DATA_CORRUPTION=y
CONFIG_PID_IN_CONTEXTIDR=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y
CONFIG_CORESIGHT_SOURCE_ETM4X=y

View file

@ -5,7 +5,7 @@
# Copyright (C) 1995-2000 Russell King
#
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
lib-y := changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
memmove.o memset.o setbit.o \
@ -19,6 +19,12 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
copy_from_user.o copy_to_user.o
ifdef CONFIG_CC_IS_CLANG
lib-y += backtrace-clang.o
else
lib-y += backtrace.o
endif
# using lib_ here won't override already available weak symbols
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o

View file

@ -0,0 +1,217 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/lib/backtrace-clang.S
*
* Copyright (C) 2019 Nathan Huckleberry
*
*/
#include <linux/kern_levels.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
/* fp is 0 or stack frame */
#define frame r4
#define sv_fp r5
#define sv_pc r6
#define mask r7
#define sv_lr r8
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
ret lr
ENDPROC(c_backtrace)
#else
/*
* Clang does not store pc or sp in function prologues so we don't know exactly
* where the function starts.
*
* We can treat the current frame's lr as the saved pc and the preceding
* frame's lr as the current frame's lr, but we can't trace the most recent
* call. Inserting a false stack frame allows us to reference the function
* called last in the stacktrace.
*
* If the call instruction was a bl we can look at the callers branch
* instruction to calculate the saved pc. We can recover the pc in most cases,
* but in cases such as calling function pointers we cannot. In this case,
* default to using the lr. This will be some address in the function, but will
* not be the function start.
*
* Unfortunately due to the stack frame layout we can't dump r0 - r3, but these
* are less frequently saved.
*
* Stack frame layout:
* <larger addresses>
* saved lr
* frame=> saved fp
* optionally saved caller registers (r4 - r10)
* optionally saved arguments (r0 - r3)
* <top of stack frame>
* <smaller addresses>
*
* Functions start with the following code sequence:
* corrected pc => stmfd sp!, {..., fp, lr}
* add fp, sp, #x
* stmfd sp!, {r0 - r3} (optional)
*
*
*
*
*
*
* The diagram below shows an example stack setup for dump_stack.
*
* The frame for c_backtrace has pointers to the code of dump_stack. This is
* why the frame of c_backtrace is used to for the pc calculation of
* dump_stack. This is why we must move back a frame to print dump_stack.
*
* The stored locals for dump_stack are in dump_stack's frame. This means that
* to fully print dump_stack's frame we need both the frame for dump_stack (for
* locals) and the frame that was called by dump_stack (for pc).
*
* To print locals we must know where the function start is. If we read the
* function prologue opcodes we can determine which variables are stored in the
* stack frame.
*
* To find the function start of dump_stack we can look at the stored LR of
* show_stack. It points at the instruction directly after the bl dump_stack.
* We can then read the offset from the bl opcode to determine where the branch
* takes us. The address calculated must be the start of dump_stack.
*
* c_backtrace frame dump_stack:
* {[LR] } ============| ...
* {[FP] } =======| | bl c_backtrace
* | |=> ...
* {[R4-R10]} |
* {[R0-R3] } | show_stack:
* dump_stack frame | ...
* {[LR] } =============| bl dump_stack
* {[FP] } <=======| |=> ...
* {[R4-R10]}
* {[R0-R3] }
*/
stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register
@ to ensure 8 byte alignment
movs frame, r0 @ if frame pointer is zero
beq no_frame @ we have no stack frames
tst r1, #0x10 @ 26 or 32-bit mode?
moveq mask, #0xfc000003
movne mask, #0 @ mask for 32-bit
/*
* Switches the current frame to be the frame for dump_stack.
*/
add frame, sp, #24 @ switch to false frame
for_each_frame: tst frame, mask @ Check for address exceptions
bne no_frame
/*
* sv_fp is the stack frame with the locals for the current considered
* function.
*
* sv_pc is the saved lr frame the frame above. This is a pointer to a code
* address within the current considered function, but it is not the function
* start. This value gets updated to be the function start later if it is
* possible.
*/
1001: ldr sv_pc, [frame, #4] @ get saved 'pc'
1002: ldr sv_fp, [frame, #0] @ get saved fp
teq sv_fp, mask @ make sure next frame exists
beq no_frame
/*
* sv_lr is the lr from the function that called the current function. This is
* a pointer to a code address in the current function's caller. sv_lr-4 is
* the instruction used to call the current function.
*
* This sv_lr can be used to calculate the function start if the function was
* called using a bl instruction. If the function start can be recovered sv_pc
* is overwritten with the function start.
*
* If the current function was called using a function pointer we cannot
* recover the function start and instead continue with sv_pc as an arbitrary
* value within the current function. If this is the case we cannot print
* registers for the current function, but the stacktrace is still printed
* properly.
*/
1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame
ldr r0, [sv_lr, #-4] @ get call instruction
ldr r3, .Lopcode+4
and r2, r3, r0 @ is this a bl call
teq r2, r3
bne finished_setup @ give up if it's not
and r0, #0xffffff @ get call offset 24-bit int
lsl r0, r0, #8 @ sign extend offset
asr r0, r0, #8
ldr sv_pc, [sv_fp, #4] @ get lr address
add sv_pc, sv_pc, #-4 @ get call instruction address
add sv_pc, sv_pc, #8 @ take care of prefetch
add sv_pc, sv_pc, r0, lsl #2@ find function start
finished_setup:
bic sv_pc, sv_pc, mask @ mask PC/LR for the mode
/*
* Print the function (sv_pc) and where it was called from (sv_lr).
*/
1004: mov r0, sv_pc
mov r1, sv_lr
mov r2, frame
bic r1, r1, mask @ mask PC/LR for the mode
bl dump_backtrace_entry
/*
* Test if the function start is a stmfd instruction to determine which
* registers were stored in the function prologue.
*
* If we could not recover the sv_pc because we were called through a function
* pointer the comparison will fail and no registers will print. Unwinding will
* continue as if there had been no registers stored in this frame.
*/
1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr}
ldr r3, .Lopcode @ instruction exists,
teq r3, r1, lsr #11
ldr r0, [frame] @ locals are stored in
@ the preceding frame
subeq r0, r0, #4
bleq dump_backtrace_stm @ dump saved registers
/*
* If we are out of frames or if the next frame is invalid.
*/
teq sv_fp, #0 @ zero saved fp means
beq no_frame @ no further frames
cmp sv_fp, frame @ next frame must be
mov frame, sv_fp @ above the current frame
bhi for_each_frame
1006: adr r0, .Lbad
mov r1, frame
bl printk
no_frame: ldmfd sp!, {r4 - r9, fp, pc}
ENDPROC(c_backtrace)
.pushsection __ex_table,"a"
.align 3
.long 1001b, 1006b
.long 1002b, 1006b
.long 1003b, 1006b
.long 1004b, 1006b
.long 1005b, 1006b
.popsection
.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n"
.align
.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr}
.word 0x0b000000 @ bl if these bits are set
#endif

View file

@ -58,6 +58,8 @@ config ARM64
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_THINLTO
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
@ -131,7 +133,7 @@ config ARM64
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
@ -917,6 +919,10 @@ config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
# Supported by clang >= 7.0
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
@ -1073,6 +1079,15 @@ config PRINT_VMEMLAYOUT
If unsure, say N.
config ARM64_TAGGED_ADDR_ABI
bool "Enable the tagged user addresses syscall ABI"
default y
help
When this option is enabled, user applications can opt in to a
relaxed ABI via prctl() allowing tagged addresses to be passed
to system calls as pointer arguments. For details, see
Documentation/arm64/tagged-address-abi.rst.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT

View file

@ -62,6 +62,10 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
KBUILD_CFLAGS += -ffixed-x18
endif
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__

View file

@ -8,6 +8,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_PSI=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_IKHEADERS=m
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
@ -28,6 +29,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_FHANDLE is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT_ALWAYS_ON=y
# CONFIG_RSEQ is not set
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
@ -49,12 +51,10 @@ CONFIG_ARMV8_DEPRECATED=y
CONFIG_SWP_EMULATION=y
CONFIG_CP15_BARRIER_EMULATION=y
CONFIG_SETEND_EMULATION=y
CONFIG_ARM64_SW_TTBR0_PAN=y
CONFIG_RANDOMIZE_BASE=y
# CONFIG_DMI is not set
CONFIG_COMPAT=y
CONFIG_PM_WAKELOCKS=y
CONFIG_PM_WAKELOCKS_LIMIT=0
# CONFIG_PM_WAKELOCKS_GC is not set
CONFIG_ENERGY_MODEL=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_CPUIDLE=y
@ -63,6 +63,7 @@ CONFIG_CPU_FREQ_TIMES=y
CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPUFREQ_DUMMY=m
CONFIG_ARM_SCPI_CPUFREQ=y
CONFIG_ARM_SCMI_CPUFREQ=y
CONFIG_ARM_SCMI_PROTOCOL=y
@ -71,14 +72,18 @@ CONFIG_ARM_SCPI_PROTOCOL=y
# CONFIG_ARM_SCPI_POWER_DOMAIN is not set
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_AES_ARM64=y
CONFIG_CRYPTO_SHA2_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_KPROBES=y
CONFIG_LTO_CLANG=y
CONFIG_CFI_CLANG=y
CONFIG_SHADOW_CALL_STACK=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_GKI_HACKS_TO_FIX=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_BINFMT_MISC=m
CONFIG_CMA=y
CONFIG_CMA_AREAS=16
CONFIG_ZSMALLOC=y
@ -187,8 +192,11 @@ CONFIG_NET_CLS_BPF=y
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_U32=y
CONFIG_NET_CLS_ACT=y
CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_BPF_JIT=y
CONFIG_CAN=m
CONFIG_CAN_VCAN=m
CONFIG_BT=y
CONFIG_CFG80211=y
# CONFIG_CFG80211_DEFAULT_PS is not set
@ -197,14 +205,17 @@ CONFIG_MAC80211=y
# CONFIG_MAC80211_RC_MINSTREL is not set
CONFIG_RFKILL=y
# CONFIG_UEVENT_HELPER is not set
# CONFIG_FW_CACHE is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_DEBUG_DEVRES=y
CONFIG_DMA_CMA=y
CONFIG_GNSS=y
CONFIG_GNSS_CMDLINE_SERIAL=m
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_BLK=m
CONFIG_UID_SYS_STATS=y
CONFIG_SCSI=y
# CONFIG_SCSI_PROC_FS is not set
@ -214,6 +225,7 @@ CONFIG_SCSI_UFSHCD_PLATFORM=y
CONFIG_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=y
CONFIG_DM_SNAPSHOT=y
CONFIG_DM_UEVENT=y
CONFIG_DM_VERITY=y
CONFIG_DM_VERITY_AVB=y
@ -221,7 +233,7 @@ CONFIG_DM_VERITY_FEC=y
CONFIG_DM_BOW=y
CONFIG_NETDEVICES=y
CONFIG_TUN=y
CONFIG_VIRTIO_NET=y
CONFIG_VIRTIO_NET=m
# CONFIG_ETHERNET is not set
CONFIG_PHYLIB=y
CONFIG_PPP=y
@ -255,9 +267,10 @@ CONFIG_USB_USBNET=y
# CONFIG_WLAN_VENDOR_TI is not set
# CONFIG_WLAN_VENDOR_ZYDAS is not set
# CONFIG_WLAN_VENDOR_QUANTENNA is not set
CONFIG_VIRT_WIFI=y
CONFIG_VIRT_WIFI=m
CONFIG_INPUT_FF_MEMLESS=y
CONFIG_INPUT_EVDEV=y
CONFIG_KEYBOARD_GPIO=y
# CONFIG_INPUT_MOUSE is not set
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_MISC=y
@ -273,9 +286,9 @@ CONFIG_SERIAL_OF_PLATFORM=m
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_DEV_BUS=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_HW_RANDOM_VIRTIO=m
# CONFIG_HW_RANDOM_CAVIUM is not set
# CONFIG_DEVPORT is not set
# CONFIG_I2C_COMPAT is not set
@ -293,13 +306,14 @@ CONFIG_DEVFREQ_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_MFD_ACT8945A=y
CONFIG_MFD_SYSCON=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_MEDIA_SUPPORT=y
CONFIG_MEDIA_CAMERA_SUPPORT=y
CONFIG_MEDIA_CONTROLLER=y
# CONFIG_VGA_ARB is not set
CONFIG_DRM=y
# CONFIG_DRM_FBDEV_EMULATION is not set
CONFIG_DRM_VIRTIO_GPU=y
CONFIG_DRM_VIRTIO_GPU=m
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_HRTIMER=y
@ -339,14 +353,15 @@ CONFIG_LEDS_TRIGGERS=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_SYSTOHC is not set
CONFIG_RTC_DRV_TEST=m
CONFIG_RTC_DRV_PL030=y
CONFIG_RTC_DRV_PL031=y
CONFIG_DMADEVICES=y
CONFIG_UIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_PCI=m
# CONFIG_VIRTIO_PCI_LEGACY is not set
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_INPUT=m
CONFIG_VIRTIO_MMIO=m
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_STAGING=y
CONFIG_ASHMEM=y
@ -365,6 +380,7 @@ CONFIG_DEVFREQ_GOV_POWERSAVE=y
CONFIG_DEVFREQ_GOV_USERSPACE=y
CONFIG_DEVFREQ_GOV_PASSIVE=y
CONFIG_EXTCON=y
CONFIG_IIO=y
CONFIG_PWM=y
CONFIG_QCOM_PDC=y
CONFIG_GENERIC_PHY=y
@ -376,6 +392,8 @@ CONFIG_EXT4_FS_SECURITY=y
CONFIG_F2FS_FS=y
CONFIG_F2FS_FS_SECURITY=y
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_VERITY=y
CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
# CONFIG_DNOTIFY is not set
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
@ -389,6 +407,55 @@ CONFIG_SDCARD_FS=y
CONFIG_PSTORE=y
CONFIG_PSTORE_CONSOLE=y
CONFIG_PSTORE_RAM=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_737=y
CONFIG_NLS_CODEPAGE_775=y
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_CODEPAGE_852=y
CONFIG_NLS_CODEPAGE_855=y
CONFIG_NLS_CODEPAGE_857=y
CONFIG_NLS_CODEPAGE_860=y
CONFIG_NLS_CODEPAGE_861=y
CONFIG_NLS_CODEPAGE_862=y
CONFIG_NLS_CODEPAGE_863=y
CONFIG_NLS_CODEPAGE_864=y
CONFIG_NLS_CODEPAGE_865=y
CONFIG_NLS_CODEPAGE_866=y
CONFIG_NLS_CODEPAGE_869=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_CODEPAGE_950=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_CODEPAGE_949=y
CONFIG_NLS_CODEPAGE_874=y
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_CODEPAGE_1250=y
CONFIG_NLS_CODEPAGE_1251=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_2=y
CONFIG_NLS_ISO8859_3=y
CONFIG_NLS_ISO8859_4=y
CONFIG_NLS_ISO8859_5=y
CONFIG_NLS_ISO8859_6=y
CONFIG_NLS_ISO8859_7=y
CONFIG_NLS_ISO8859_9=y
CONFIG_NLS_ISO8859_13=y
CONFIG_NLS_ISO8859_14=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_KOI8_R=y
CONFIG_NLS_KOI8_U=y
CONFIG_NLS_MAC_ROMAN=y
CONFIG_NLS_MAC_CELTIC=y
CONFIG_NLS_MAC_CENTEURO=y
CONFIG_NLS_MAC_CROATIAN=y
CONFIG_NLS_MAC_CYRILLIC=y
CONFIG_NLS_MAC_GAELIC=y
CONFIG_NLS_MAC_GREEK=y
CONFIG_NLS_MAC_ICELAND=y
CONFIG_NLS_MAC_INUIT=y
CONFIG_NLS_MAC_ROMANIAN=y
CONFIG_NLS_MAC_TURKISH=y
CONFIG_NLS_UTF8=y
CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
@ -396,11 +463,9 @@ CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CRYPTO_ADIANTUM=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_LZ4=y
CONFIG_CRYPTO_ZSTD=y
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRC_CCITT=y
CONFIG_CRC8=y
CONFIG_XZ_DEC=y
@ -415,7 +480,6 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
# CONFIG_DETECT_HUNG_TASK is not set
CONFIG_PANIC_TIMEOUT=5
CONFIG_SCHEDSTATS=y
CONFIG_FUNCTION_TRACER=y
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_STM=y

View file

@ -730,7 +730,6 @@ CONFIG_TEST_USER_COPY=m
CONFIG_MEMTEST=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_PANIC_ON_DATA_CORRUPTION=y
CONFIG_PID_IN_CONTEXTIDR=y
CONFIG_ARM64_STRICT_BREAK_BEFORE_MAKE=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y

View file

@ -779,7 +779,6 @@ CONFIG_TEST_USER_COPY=m
CONFIG_MEMTEST=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_PANIC_ON_DATA_CORRUPTION=y
CONFIG_PID_IN_CONTEXTIDR=y
CONFIG_ARM64_STRICT_BREAK_BEFORE_MAKE=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y

View file

@ -779,7 +779,6 @@ CONFIG_TEST_USER_COPY=m
CONFIG_MEMTEST=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_PANIC_ON_DATA_CORRUPTION=y
CONFIG_PID_IN_CONTEXTIDR=y
CONFIG_ARM64_STRICT_BREAK_BEFORE_MAKE=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y

View file

@ -745,7 +745,6 @@ CONFIG_TEST_USER_COPY=m
CONFIG_MEMTEST=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_PANIC_ON_DATA_CORRUPTION=y
CONFIG_PID_IN_CONTEXTIDR=y
CONFIG_ARM64_STRICT_BREAK_BEFORE_MAKE=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y

View file

@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
#define ALTINSTR_ENTRY(feature,cb) \
#define ALTINSTR_ENTRY(feature) \
" .word 661b - .\n" /* label */ \
" .if " __stringify(cb) " == 0\n" \
" .word 663f - .\n" /* new instruction */ \
" .else\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
#define ALTINSTR_ENTRY_CB(feature,cb) \
" .word 661b - .\n" /* label */ \
" .word " __stringify(cb) "- .\n" /* callback */ \
" .endif\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@ -62,15 +65,14 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
*
* Alternatives with callbacks do not generate replacement instructions.
*/
#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature,cb) \
ALTINSTR_ENTRY(feature) \
".popsection\n" \
" .if " __stringify(cb) " == 0\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
"663:\n\t" \
newinstr "\n" \
@ -78,17 +80,25 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
".popsection\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n" \
".else\n\t" \
".endif\n"
#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY_CB(feature,cb) \
".popsection\n" \
"663:\n\t" \
"664:\n\t" \
".endif\n" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
#define ALTERNATIVE_CB(oldinstr, cb) \
__ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
__ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>

View file

@ -78,10 +78,9 @@ alternative_else_nop_endif
/*
* Remove the address tag from a virtual address, if present.
*/
.macro clear_address_tag, dst, addr
tst \addr, #(1 << 55)
bic \dst, \addr, #(0xff << 56)
csel \dst, \dst, \addr, eq
.macro untagged_addr, dst, addr
sbfx \dst, \addr, #0, #56
and \dst, \dst, \addr
.endm
#endif

View file

@ -219,21 +219,31 @@ static inline unsigned long kaslr_offset(void)
* up with a tagged userland pointer. Clear the tag to get a sane pointer to
* pass on to access_ok(), for instance.
*/
#define untagged_addr(addr) \
((__typeof__(addr))sign_extend64((u64)(addr), 55))
#define __untagged_addr(addr) \
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
#define untagged_addr(addr) ({ \
u64 __addr = (__force u64)addr; \
__addr &= __untagged_addr(__addr); \
(__force __typeof__(addr))__addr; \
})
#ifdef CONFIG_KASAN_SW_TAGS
#define __tag_shifted(tag) ((u64)(tag) << 56)
#define __tag_set(addr, tag) (__typeof__(addr))( \
((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag))
#define __tag_reset(addr) untagged_addr(addr)
#define __tag_reset(addr) __untagged_addr(addr)
#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
#else
#define __tag_set(addr, tag) (addr)
#define __tag_shifted(tag) 0UL
#define __tag_reset(addr) (addr)
#define __tag_get(addr) 0
#endif
static inline const void *__tag_set(const void *addr, u8 tag)
{
u64 __addr = (u64)addr & ~__tag_shifted(0xff);
return (const void *)(__addr | __tag_shifted(tag));
}
/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
@ -304,6 +314,22 @@ static inline void *phys_to_virt(phys_addr_t x)
#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
* With non-canonical CFI jump tables, the compiler replaces function
* address references with the address of the function's CFI jump
* table entry. This results in __pa_symbol(function) returning the
* physical address of the jump table entry, which can lead to address
* space confusion since the jump table points to the function's
* virtual address. Therefore, use inline assembly to ensure we are
* always taking the address of the actual function.
*/
#define __pa_function(x) ({ \
unsigned long addr; \
asm("adrp %0, " __stringify(x) "\n\t" \
"add %0, %0, :lo12:" __stringify(x) : "=r" (addr)); \
__pa_symbol(addr); \
})
/*
* virt_to_page(k) convert a _valid_ virtual address to struct page *
* virt_addr_valid(k) indicates whether a virtual address is valid
@ -320,8 +346,9 @@ static inline void *phys_to_virt(phys_addr_t x)
#define page_to_virt(page) ({ \
unsigned long __addr = \
((__page_to_voff(page)) | PAGE_OFFSET); \
__addr = __tag_set(__addr, page_kasan_tag(page)); \
((void *)__addr); \
const void *__addr_tag = \
__tag_set((void *)__addr, page_kasan_tag(page)); \
((void *)__addr_tag); \
})
#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))

View file

@ -155,7 +155,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
phys_addr_t pgd_phys = virt_to_phys(pgdp);
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
replace_phys = (void *)__pa_function(idmap_cpu_replace_ttbr1);
cpu_install_idmap();
replace_phys(pgd_phys);

View file

@ -294,6 +294,14 @@ extern void __init minsigstksz_setup(void);
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
long set_tagged_addr_ctrl(unsigned long arg);
long get_tagged_addr_ctrl(void);
#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg)
#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl()
#endif
/*
* For CONFIG_GCC_PLUGIN_STACKLEAK
*

View file

@ -0,0 +1,37 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SCS_H
#define _ASM_SCS_H
#ifndef __ASSEMBLY__
#include <linux/scs.h>
#ifdef CONFIG_SHADOW_CALL_STACK
extern void scs_init_irq(void);
static __always_inline void scs_save(struct task_struct *tsk)
{
void *s;
asm volatile("mov %0, x18" : "=r" (s));
task_set_scs(tsk, s);
}
static inline void scs_overflow_check(struct task_struct *tsk)
{
if (unlikely(scs_corrupted(tsk)))
panic("corrupted shadow stack detected inside scheduler\n");
}
#else /* CONFIG_SHADOW_CALL_STACK */
static inline void scs_init_irq(void) {}
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}
#endif /* CONFIG_SHADOW_CALL_STACK */
#endif /* __ASSEMBLY __ */
#endif /* _ASM_SCS_H */

View file

@ -54,6 +54,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
#ifdef CONFIG_SHADOW_CALL_STACK
DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
#endif
static inline bool on_irq_stack(unsigned long sp,
struct stack_info *info)
{

View file

@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
#define NR_CTX_REGS 12
#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12
/*

View file

@ -44,6 +44,9 @@ struct thread_info {
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
int preempt_count; /* 0 => preemptable, <0 => bug */
#ifdef CONFIG_SHADOW_CALL_STACK
void *shadow_call_stack;
#endif
};
#define thread_saved_pc(tsk) \
@ -90,6 +93,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_SVE 23 /* Scalable Vector Extension in use */
#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
#define TIF_SSBD 25 /* Wants SSB mitigation */
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)

View file

@ -76,6 +76,15 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
{
unsigned long ret, limit = current_thread_info()->addr_limit;
/*
* Asynchronous I/O running in a kernel thread does not have the
* TIF_TAGGED_ADDR flag of the process owning the mm, so always untag
* the user address before checking.
*/
if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
(current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR)))
addr = untagged_addr(addr);
__chk_user_ptr(addr);
asm volatile(
// A + B <= C + 1 for all A,B,C, in four easy steps:
@ -229,7 +238,8 @@ static inline void uaccess_enable_not_uao(void)
/*
* Sanitise a uaccess pointer such that it becomes NULL if above the
* current addr_limit.
* current addr_limit. In case the pointer is tagged (has the top byte set),
* untag the pointer before checking.
*/
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@ -237,10 +247,11 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
void __user *safe_ptr;
asm volatile(
" bics xzr, %1, %2\n"
" bics xzr, %3, %2\n"
" csel %0, %1, xzr, eq\n"
: "=&r" (safe_ptr)
: "r" (ptr), "r" (current_thread_info()->addr_limit)
: "r" (ptr), "r" (current_thread_info()->addr_limit),
"r" (untagged_addr(ptr))
: "cc");
csdb();

View file

@ -58,6 +58,7 @@ arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_CRASH_CORE) += crash_core.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o
arm64-obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)

View file

@ -145,7 +145,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
static void __apply_alternatives(void *alt_region, bool is_module)
static void __nocfi __apply_alternatives(void *alt_region, bool is_module)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;

View file

@ -44,6 +44,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
#ifdef CONFIG_SHADOW_CALL_STACK
DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
BLANK();

View file

@ -45,11 +45,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return
1: mov x18, x1 // entry
1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
br x18
br x8
ENDPROC(__cpu_soft_restart)
.popsection

View file

@ -25,7 +25,7 @@ static inline void __noreturn cpu_soft_restart(unsigned long entry,
unsigned long el2_switch = !is_kernel_in_hyp_mode() &&
is_hyp_mode_available();
restart = (void *)__pa_symbol(__cpu_soft_restart);
restart = (void *)__pa_function(__cpu_soft_restart);
cpu_install_idmap();
restart(el2_switch, entry, arg0, arg1, arg2);

View file

@ -974,7 +974,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
if (kpti_applied)
return;
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
remap_fn = (void *)__pa_function(idmap_kpti_install_ng_mappings);
cpu_install_idmap();
remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));

View file

@ -37,5 +37,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
0: b efi_handle_corrupted_x18 // tail call
0:
#ifdef CONFIG_SHADOW_CALL_STACK
/*
* Restore x18 before returning to instrumented code. This is
* safe because the wrapper is called with preemption disabled and
* a separate shadow stack is used for interrupts.
*/
mov x18, x2
#endif
b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)

View file

@ -183,6 +183,10 @@ alternative_cb_end
apply_ssbd 1, x22, x23
#ifdef CONFIG_SHADOW_CALL_STACK
ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
#endif
.else
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
@ -274,6 +278,12 @@ alternative_else_nop_endif
ct_user_enter
.endif
#ifdef CONFIG_SHADOW_CALL_STACK
.if \el == 0
str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
.endif
#endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@ -367,6 +377,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
.macro irq_stack_entry
mov x19, sp // preserve the original sp
#ifdef CONFIG_SHADOW_CALL_STACK
mov x20, x18 // preserve the original shadow stack
#endif
/*
* Compare sp with the base of the task stack.
@ -384,15 +397,24 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
/* switch to the irq stack */
mov sp, x26
#ifdef CONFIG_SHADOW_CALL_STACK
/* also switch to the irq shadow stack */
ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
#endif
9998:
.endm
/*
* x19 should be preserved between irq_stack_entry and
* irq_stack_exit.
* The callee-saved regs (x19-x29) should be preserved between
* irq_stack_entry and irq_stack_exit.
*/
.macro irq_stack_exit
mov sp, x19
#ifdef CONFIG_SHADOW_CALL_STACK
mov x18, x20
#endif
.endm
/*
@ -568,7 +590,7 @@ el1_da:
*/
mrs x3, far_el1
inherit_daif pstate=x23, tmp=x2
clear_address_tag x0, x3
untagged_addr x0, x3
mov x2, sp // struct pt_regs
bl do_mem_abort
@ -742,7 +764,7 @@ el0_da:
mrs x26, far_el1
enable_daif
ct_user_exit
clear_address_tag x0, x26
untagged_addr x0, x26
mov x1, x25
mov x2, sp
bl do_mem_abort
@ -1071,6 +1093,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
#ifdef CONFIG_SHADOW_CALL_STACK
str x18, [x0, #TSK_TI_SCS]
ldr x18, [x1, #TSK_TI_SCS]
str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)

View file

@ -37,6 +37,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@ -419,6 +420,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp
#ifdef CONFIG_SHADOW_CALL_STACK
adr_l x18, init_shadow_call_stack // Set shadow call stack
#endif
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
@ -722,6 +727,10 @@ __secondary_switched:
mov sp, x1
ldr x2, [x0, #CPU_BOOT_TASK]
msr sp_el0, x2
#ifdef CONFIG_SHADOW_CALL_STACK
ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel

View file

@ -30,6 +30,7 @@
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
#include <asm/vmap_stack.h>
#include <asm/scs.h>
unsigned long irq_err_count;
@ -72,6 +73,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");

View file

@ -30,6 +30,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/delay.h>
@ -49,6 +50,7 @@
#include <trace/events/power.h>
#include <linux/percpu.h>
#include <linux/thread_info.h>
#include <linux/prctl.h>
#include <asm/alternative.h>
#include <asm/compat.h>
@ -57,6 +59,7 @@
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/scs.h>
#include <asm/stacktrace.h>
#ifdef CONFIG_STACKPROTECTOR
@ -341,11 +344,18 @@ static void tls_thread_flush(void)
}
}
static void flush_tagged_addr_state(void)
{
if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI))
clear_thread_flag(TIF_TAGGED_ADDR);
}
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
}
void release_thread(struct task_struct *dead_task)
@ -526,6 +536,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
scs_overflow_check(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@ -616,3 +627,70 @@ void __used stackleak_check_alloca(unsigned long size)
}
EXPORT_SYMBOL(stackleak_check_alloca);
#endif
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
/*
* Control the relaxed ABI allowing tagged user addresses into the kernel.
*/
static unsigned int tagged_addr_disabled;
long set_tagged_addr_ctrl(unsigned long arg)
{
if (is_compat_task())
return -EINVAL;
if (arg & ~PR_TAGGED_ADDR_ENABLE)
return -EINVAL;
/*
* Do not allow the enabling of the tagged address ABI if globally
* disabled via sysctl abi.tagged_addr_disabled.
*/
if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
return -EINVAL;
update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
return 0;
}
long get_tagged_addr_ctrl(void)
{
if (is_compat_task())
return -EINVAL;
if (test_thread_flag(TIF_TAGGED_ADDR))
return PR_TAGGED_ADDR_ENABLE;
return 0;
}
/*
* Global sysctl to disable the tagged user addresses support. This control
* only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI.
*/
static int zero;
static int one = 1;
static struct ctl_table tagged_addr_sysctl_table[] = {
{
.procname = "tagged_addr_disabled",
.mode = 0644,
.data = &tagged_addr_disabled,
.maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{ }
};
static int __init tagged_addr_init(void)
{
if (!register_sysctl("abi", tagged_addr_sysctl_table))
return -EINVAL;
return 0;
}
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */

View file

@ -46,7 +46,8 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu)
static int cpu_psci_cpu_boot(unsigned int cpu)
{
int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry));
int err = psci_ops.cpu_on(cpu_logical_map(cpu),
__pa_function(secondary_entry));
if (err)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);

40
arch/arm64/kernel/scs.c Normal file
View file

@ -0,0 +1,40 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Shadow Call Stack support.
*
* Copyright (C) 2019 Google LLC
*/
#include <linux/percpu.h>
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <asm/scs.h>
DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
__aligned(SCS_SIZE);
#endif
void scs_init_irq(void)
{
int cpu;
for_each_possible_cpu(cpu) {
#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
unsigned long *p;
p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
VMALLOC_START, VMALLOC_END,
GFP_SCS, PAGE_KERNEL,
0, cpu_to_node(cpu),
__builtin_return_address(0));
per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
#else
per_cpu(irq_shadow_call_stack_ptr, cpu) =
per_cpu(irq_shadow_call_stack, cpu);
#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
}
}

View file

@ -54,6 +54,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@ -353,6 +354,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();
/* Save the shadow stack pointer before exiting the idle task */
scs_save(current);
idle_task_exit();
local_daif_mask();

View file

@ -99,7 +99,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* boot-loader's endianess before jumping. This is mandated by
* the boot protocol.
*/
writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
writeq_relaxed(__pa_function(secondary_holding_pen), release_addr);
__flush_dcache_area((__force void *)release_addr,
sizeof(*release_addr));

View file

@ -17,6 +17,8 @@ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
ccflags-y += $(DISABLE_LTO)
CFLAGS_REMOVE_vgettimeofday.o += $(CC_FLAGS_SCS)
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

View file

@ -34,3 +34,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# remove the SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

View file

@ -31,7 +31,12 @@
.text
.pushsection .hyp.text, "ax"
/*
* We treat x18 as callee-saved as the host may use it as a platform
* register (e.g. for shadow call stack).
*/
.macro save_callee_saved_regs ctxt
str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@ -41,6 +46,8 @@
.endm
.macro restore_callee_saved_regs ctxt
// We require \ctxt is not x18-x28
ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@ -57,29 +64,26 @@ ENTRY(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
// x18: guest context
// x29: guest context
// Store the host regs
save_callee_saved_regs x1
add x18, x0, #VCPU_CONTEXT
add x29, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
// Restore guest regs x19-x29, lr
restore_callee_saved_regs x18
// Restore guest reg x18
ldr x18, [x18, #CPU_XREG_OFFSET(18)]
// Restore guest regs x18-x29, lr
restore_callee_saved_regs x29
// Do not touch any register after this!
eret
@ -101,7 +105,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1
// Store the guest regs x0-x1 and x4-x18
// Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@ -110,9 +114,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
str x18, [x1, #CPU_XREG_OFFSET(18)]
// Store the guest regs x19-x29, lr
// Store the guest regs x18-x29, lr
save_callee_saved_regs x1
get_host_ctxt x2, x3

View file

@ -45,45 +45,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]
mov x18, #(PAGE_SIZE - 128)
add x0, x0, #256
add x1, x1, #128
1:
subs x18, x18, #128
tst x0, #(PAGE_SIZE - 1)
alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif
stnp x2, x3, [x0]
stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
stnp x4, x5, [x0, #16]
stnp x4, x5, [x0, #16 - 256]
ldp x4, x5, [x1, #16]
stnp x6, x7, [x0, #32]
stnp x6, x7, [x0, #32 - 256]
ldp x6, x7, [x1, #32]
stnp x8, x9, [x0, #48]
stnp x8, x9, [x0, #48 - 256]
ldp x8, x9, [x1, #48]
stnp x10, x11, [x0, #64]
stnp x10, x11, [x0, #64 - 256]
ldp x10, x11, [x1, #64]
stnp x12, x13, [x0, #80]
stnp x12, x13, [x0, #80 - 256]
ldp x12, x13, [x1, #80]
stnp x14, x15, [x0, #96]
stnp x14, x15, [x0, #96 - 256]
ldp x14, x15, [x1, #96]
stnp x16, x17, [x0, #112]
stnp x16, x17, [x0, #112 - 256]
ldp x16, x17, [x1, #112]
add x0, x0, #128
add x1, x1, #128
b.gt 1b
b.ne 1b
stnp x2, x3, [x0]
stnp x4, x5, [x0, #16]
stnp x6, x7, [x0, #32]
stnp x8, x9, [x0, #48]
stnp x10, x11, [x0, #64]
stnp x12, x13, [x0, #80]
stnp x14, x15, [x0, #96]
stnp x16, x17, [x0, #112]
stnp x2, x3, [x0, #-256]
stnp x4, x5, [x0, #16 - 256]
stnp x6, x7, [x0, #32 - 256]
stnp x8, x9, [x0, #48 - 256]
stnp x10, x11, [x0, #64 - 256]
stnp x12, x13, [x0, #80 - 256]
stnp x14, x15, [x0, #96 - 256]
stnp x16, x17, [x0, #112 - 256]
ret
ENDPROC(copy_page)

View file

@ -117,6 +117,8 @@ ENDPROC(cpu_do_idle)
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
*
* This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
@ -141,6 +143,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
/*
* Save x18 as it may be used as a platform register, e.g. by shadow
* call stack.
*/
str x18, [x0, #96]
ret
ENDPROC(cpu_do_suspend)
@ -157,6 +164,13 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
/*
* Restore x18, as it may be used as a platform register, and clear
* the buffer to minimize the risk of exposure when used for shadow
* call stack.
*/
ldr x18, [x0, #96]
str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
@ -310,15 +324,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
ldaxr w18, [flag_ptr]
eor w18, w18, num_cpus
cbnz w18, 1b
ldaxr w17, [flag_ptr]
eor w17, w17, num_cpus
cbnz w17, 1b
/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
mrs x18, sctlr_el1
bic x18, x18, #SCTLR_ELx_M
msr sctlr_el1, x18
mrs x17, sctlr_el1
bic x17, x17, #SCTLR_ELx_M
msr sctlr_el1, x17
isb
/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@ -341,9 +355,9 @@ skip_pgd:
isb
/* We're done: fire up the MMU again */
mrs x18, sctlr_el1
orr x18, x18, #SCTLR_ELx_M
msr sctlr_el1, x18
mrs x17, sctlr_el1
orr x17, x17, #SCTLR_ELx_M
msr sctlr_el1, x17
isb
/*
@ -413,33 +427,9 @@ skip_pte:
b.ne do_pte
b next_pmd
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
__idmap_cpu_set_reserved_ttbr1 x18, x17
/* Increment the flag to let the boot CPU we're ready */
1: ldxr w18, [flag_ptr]
add w18, w18, #1
stxr w17, w18, [flag_ptr]
cbnz w17, 1b
/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
ldxr w18, [flag_ptr]
cbnz w18, 1b
/* All done, act like nothing happened */
msr ttbr1_el1, swapper_ttb
isb
ret
.unreq cpu
.unreq num_cpus
.unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
.unreq cur_pgdp
.unreq end_pgdp
.unreq pgd
@ -452,6 +442,31 @@ __idmap_kpti_secondary:
.unreq cur_ptep
.unreq end_ptep
.unreq pte
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
__idmap_cpu_set_reserved_ttbr1 x16, x17
/* Increment the flag to let the boot CPU we're ready */
1: ldxr w16, [flag_ptr]
add w16, w16, #1
stxr w17, w16, [flag_ptr]
cbnz w17, 1b
/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
ldxr w16, [flag_ptr]
cbnz w16, 1b
/* All done, act like nothing happened */
msr ttbr1_el1, swapper_ttb
isb
ret
.unreq swapper_ttb
.unreq flag_ptr
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif

View file

@ -95,7 +95,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@ -126,7 +126,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@ -170,7 +170,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@ -195,8 +195,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(mfspr(SPRN_SDAR)))
if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
*addrp = 0;
}
@ -435,7 +434,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@ -463,8 +462,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
*/
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(addr))
if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@ -2068,12 +2066,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type &
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
perf_get_data_addr(regs, &data.addr);
perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(cpuhw);
power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
}

View file

@ -9,10 +9,12 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_PSI=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_IKHEADERS=m
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_BPF=y
CONFIG_SCHED_AUTOGROUP=y
@ -27,6 +29,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_FHANDLE is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT_ALWAYS_ON=y
# CONFIG_RSEQ is not set
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
@ -34,14 +37,14 @@ CONFIG_EMBEDDED=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
CONFIG_PROFILING=y
CONFIG_SMP=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_NR_CPUS=32
CONFIG_EFI=y
CONFIG_PM_WAKELOCKS=y
CONFIG_PM_WAKELOCKS_LIMIT=0
# CONFIG_PM_WAKELOCKS_GC is not set
CONFIG_CPU_FREQ_TIMES=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPUFREQ_DUMMY=m
CONFIG_IA32_EMULATION=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
@ -49,7 +52,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_GKI_HACKS_TO_FIX=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSMALLOC=y
CONFIG_NET=y
CONFIG_PACKET=y
@ -157,6 +160,7 @@ CONFIG_NET_EMATCH_U32=y
CONFIG_NET_CLS_ACT=y
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_BPF_JIT=y
CONFIG_CAN=m
# CONFIG_CAN_BCM is not set
# CONFIG_CAN_GW is not set
@ -168,8 +172,11 @@ CONFIG_MAC80211=y
# CONFIG_MAC80211_RC_MINSTREL is not set
CONFIG_RFKILL=y
# CONFIG_UEVENT_HELPER is not set
# CONFIG_FW_CACHE is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_DEBUG_DEVRES=y
CONFIG_GNSS=y
CONFIG_GNSS_CMDLINE_SERIAL=m
CONFIG_OF=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=y
@ -184,6 +191,7 @@ CONFIG_BLK_DEV_SD=y
CONFIG_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=y
CONFIG_DM_SNAPSHOT=y
CONFIG_DM_UEVENT=y
CONFIG_DM_VERITY=y
CONFIG_DM_VERITY_AVB=y
@ -227,7 +235,7 @@ CONFIG_USB_USBNET=y
# CONFIG_WLAN_VENDOR_QUANTENNA is not set
CONFIG_VIRT_WIFI=m
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
CONFIG_KEYBOARD_GPIO=y
# CONFIG_INPUT_MOUSE is not set
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_MISC=y
@ -241,6 +249,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_8250_EXAR is not set
CONFIG_SERIAL_OF_PLATFORM=m
CONFIG_SERIAL_DEV_BUS=y
CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=m
# CONFIG_DEVPORT is not set
@ -251,6 +260,8 @@ CONFIG_GPIOLIB=y
# CONFIG_HWMON is not set
CONFIG_DEVFREQ_THERMAL=y
# CONFIG_X86_PKG_TEMP_THERMAL is not set
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_MEDIA_SUPPORT=y
CONFIG_MEDIA_CAMERA_SUPPORT=y
# CONFIG_VGA_ARB is not set
@ -287,6 +298,10 @@ CONFIG_USB_CONFIGFS_F_ACC=y
CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
CONFIG_USB_CONFIGFS_F_MIDI=y
CONFIG_MMC=m
# CONFIG_PWRSEQ_EMMC is not set
# CONFIG_PWRSEQ_SIMPLE is not set
CONFIG_MMC_SDHCI=m
CONFIG_MMC_SDHCI_PLTFM=m
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_TRIGGERS=y
@ -305,6 +320,7 @@ CONFIG_ASHMEM=y
CONFIG_ION=y
CONFIG_ION_SYSTEM_HEAP=y
CONFIG_PM_DEVFREQ=y
CONFIG_IIO=y
CONFIG_ANDROID=y
CONFIG_ANDROID_BINDER_IPC=y
CONFIG_EXT4_FS=y
@ -313,6 +329,8 @@ CONFIG_EXT4_ENCRYPTION=y
CONFIG_F2FS_FS=y
CONFIG_F2FS_FS_SECURITY=y
CONFIG_F2FS_FS_ENCRYPTION=y
CONFIG_FS_VERITY=y
CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
# CONFIG_DNOTIFY is not set
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
@ -327,13 +345,63 @@ CONFIG_SDCARD_FS=y
CONFIG_PSTORE=y
CONFIG_PSTORE_CONSOLE=y
CONFIG_PSTORE_RAM=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_737=y
CONFIG_NLS_CODEPAGE_775=y
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_CODEPAGE_852=y
CONFIG_NLS_CODEPAGE_855=y
CONFIG_NLS_CODEPAGE_857=y
CONFIG_NLS_CODEPAGE_860=y
CONFIG_NLS_CODEPAGE_861=y
CONFIG_NLS_CODEPAGE_862=y
CONFIG_NLS_CODEPAGE_863=y
CONFIG_NLS_CODEPAGE_864=y
CONFIG_NLS_CODEPAGE_865=y
CONFIG_NLS_CODEPAGE_866=y
CONFIG_NLS_CODEPAGE_869=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_CODEPAGE_950=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_CODEPAGE_949=y
CONFIG_NLS_CODEPAGE_874=y
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_CODEPAGE_1250=y
CONFIG_NLS_CODEPAGE_1251=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_2=y
CONFIG_NLS_ISO8859_3=y
CONFIG_NLS_ISO8859_4=y
CONFIG_NLS_ISO8859_5=y
CONFIG_NLS_ISO8859_6=y
CONFIG_NLS_ISO8859_7=y
CONFIG_NLS_ISO8859_9=y
CONFIG_NLS_ISO8859_13=y
CONFIG_NLS_ISO8859_14=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_KOI8_R=y
CONFIG_NLS_KOI8_U=y
CONFIG_NLS_MAC_ROMAN=y
CONFIG_NLS_MAC_CELTIC=y
CONFIG_NLS_MAC_CENTEURO=y
CONFIG_NLS_MAC_CROATIAN=y
CONFIG_NLS_MAC_CYRILLIC=y
CONFIG_NLS_MAC_GAELIC=y
CONFIG_NLS_MAC_GREEK=y
CONFIG_NLS_MAC_ICELAND=y
CONFIG_NLS_MAC_INUIT=y
CONFIG_NLS_MAC_ROMANIAN=y
CONFIG_NLS_MAC_TURKISH=y
CONFIG_NLS_UTF8=y
CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CRYPTO_ADIANTUM=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_SHA256_SSSE3=y
CONFIG_CRYPTO_AES_NI_INTEL=y
CONFIG_CRYPTO_LZ4=y
CONFIG_CRYPTO_ZSTD=y
CONFIG_CRYPTO_ANSI_CPRNG=y

View file

@ -563,9 +563,11 @@ static int bts_event_init(struct perf_event *event)
* Note that the default paranoia setting permits unprivileged
* users to profile the kernel.
*/
if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
!capable(CAP_SYS_ADMIN))
return -EACCES;
if (event->attr.exclude_kernel) {
ret = perf_allow_kernel(&event->attr);
if (ret)
return ret;
}
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;

View file

@ -3109,8 +3109,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
ret = perf_allow_cpu(&event->attr);
if (ret)
return ret;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;

View file

@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
* the user needs special permissions to be able to use it
*/
if (p4_ht_active() && p4_event_bind_map[v].shared) {
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
v = perf_allow_cpu(&event->attr);
if (v)
return v;
}
/* ESCR EventMask bits may be invalid */

View file

@ -1213,7 +1213,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
static void pt_event_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
unsigned long msr_a, msr_b;
struct perf_addr_filter_range *fr = event->addr_filter_ranges;
struct pt_filters *filters = event->hw.addr_filters;
struct perf_addr_filter *filter;
int range = 0;
@ -1222,12 +1223,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
if (filter->path.dentry && !offs[range]) {
if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
msr_a = filter->offset + offs[range];
msr_b = filter->size + msr_a - 1;
msr_a = fr[range].start;
msr_b = msr_a + fr[range].size - 1;
}
filters->filter[range].msr_a = msr_a;

View file

@ -816,6 +816,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
bio->bi_iter.bi_size += len;
bio->bi_vcnt++;
if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
bio_set_flag(bio, BIO_WORKINGSET);
}
EXPORT_SYMBOL_GPL(__bio_add_page);

View file

@ -35,6 +35,7 @@
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
#include <linux/psi.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@ -2550,6 +2551,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/
blk_qc_t submit_bio(struct bio *bio)
{
bool workingset_read = false;
unsigned long pflags;
blk_qc_t ret;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
@ -2565,6 +2570,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
if (bio_flagged(bio, BIO_WORKINGSET))
workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
@ -2579,7 +2586,21 @@ blk_qc_t submit_bio(struct bio *bio)
}
}
return generic_make_request(bio);
/*
* If we're reading data that is part of the userspace
* workingset, count submission time as memory stall. When the
* device is congested, or the submitting cgroup IO-throttled,
* submission can be a significant part of overall IO time.
*/
if (workingset_read)
psi_memstall_enter(&pflags);
ret = generic_make_request(bio);
if (workingset_read)
psi_memstall_leave(&pflags);
return ret;
}
EXPORT_SYMBOL(submit_bio);

16
build.config.allmodconfig Normal file
View file

@ -0,0 +1,16 @@
DEFCONFIG=allmodconfig
# XFS_FS is currently broken on this branch with clang-9
POST_DEFCONFIG_CMDS="update_config"
function update_config() {
${KERNEL_DIR}/scripts/config --file ${OUT_DIR}/.config \
-d TEST_KMOD \
-d XFS_FS \
-d CPU_BIG_ENDIAN \
-d STM \
-d TEST_MEMCAT_P \
-e UNWINDER_FRAME_POINTER \
(cd ${OUT_DIR} && \
make O=${OUT_DIR} $archsubarch CC=${CC} CROSS_COMPILE=${CROSS_COMPILE} olddefconfig)
}

View file

@ -0,0 +1,4 @@
. ${ROOT_DIR}/common/build.config.common
. ${ROOT_DIR}/common/build.config.aarch64
. ${ROOT_DIR}/common/build.config.allmodconfig

View file

@ -0,0 +1,4 @@
. ${ROOT_DIR}/common/build.config.common
. ${ROOT_DIR}/common/build.config.x86_64
. ${ROOT_DIR}/common/build.config.allmodconfig

View file

@ -1,9 +1,13 @@
BRANCH=android-4.19-q
BRANCH=android-4.19
KERNEL_DIR=common
CC=clang
CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r365631c/bin
LD=ld.lld
NM=llvm-nm
OBJCOPY=llvm-objcopy
CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin
EXTRA_CMDS=''
STOP_SHIP_TRACEPRINTK=1
LD=ld.lld
IN_KERNEL_MODULES=1
DO_NOT_STRIP_MODULES=1

4
build.config.gki Normal file
View file

@ -0,0 +1,4 @@
DEFCONFIG=gki_defconfig
POST_DEFCONFIG_CMDS="check_defconfig"
BUILD_INITRAMFS=1

View file

@ -1,19 +1,6 @@
ARCH=arm64
BRANCH=android-4.19
CLANG_TRIPLE=aarch64-linux-gnu-
CROSS_COMPILE=aarch64-linux-androidkernel-
CC=clang
DEFCONFIG=gki_defconfig
EXTRA_CMDS=''
KERNEL_DIR=common
POST_DEFCONFIG_CMDS="check_defconfig"
CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin
FILES="
arch/arm64/boot/Image.gz
vmlinux
System.map
"
STOP_SHIP_TRACEPRINTK=1
. ${ROOT_DIR}/common/build.config.common
. ${ROOT_DIR}/common/build.config.aarch64
. ${ROOT_DIR}/common/build.config.gki
ABI_DEFINITION=abi_gki_aarch64.xml
BUILD_INITRAMFS=1
KMI_WHITELIST=abi_gki_aarch64_whitelist

View file

@ -1,18 +1,4 @@
ARCH=x86_64
BRANCH=android-4.19
CLANG_TRIPLE=x86_64-linux-gnu-
CROSS_COMPILE=x86_64-linux-androidkernel-
CC=clang
DEFCONFIG=gki_defconfig
EXTRA_CMDS=''
KERNEL_DIR=common
POST_DEFCONFIG_CMDS="check_defconfig"
CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
FILES="
arch/x86/boot/bzImage
vmlinux
System.map
"
STOP_SHIP_TRACEPRINTK=1
BUILD_INITRAMFS=1
. ${ROOT_DIR}/common/build.config.common
. ${ROOT_DIR}/common/build.config.x86_64
. ${ROOT_DIR}/common/build.config.gki

View file

@ -20,6 +20,18 @@ config ANDROID_BINDER_IPC
Android process, using Binder to identify, invoke and pass arguments
between said processes.
config ANDROID_BINDERFS
bool "Android Binderfs filesystem"
depends on ANDROID_BINDER_IPC
default n
---help---
Binderfs is a pseudo-filesystem for the Android Binder IPC driver
which can be mounted per-ipc namespace allowing to run multiple
instances of Android.
Each binderfs mount initially only contains a binder-control device.
It can be used to dynamically allocate new binder IPC devices via
ioctls.
config ANDROID_BINDER_DEVICES
string "Android Binder devices"
depends on ANDROID_BINDER_IPC

View file

@ -1,4 +1,5 @@
ccflags-y += -I$(src) # needed for trace events
obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o
obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o
obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o

View file

@ -78,6 +78,7 @@
#include <asm/cacheflush.h>
#include "binder_alloc.h"
#include "binder_internal.h"
#include "binder_trace.h"
static HLIST_HEAD(binder_deferred_list);
@ -94,22 +95,8 @@ static struct dentry *binder_debugfs_dir_entry_root;
static struct dentry *binder_debugfs_dir_entry_proc;
static atomic_t binder_last_id;
#define BINDER_DEBUG_ENTRY(name) \
static int binder_##name##_open(struct inode *inode, struct file *file) \
{ \
return single_open(file, binder_##name##_show, inode->i_private); \
} \
\
static const struct file_operations binder_##name##_fops = { \
.owner = THIS_MODULE, \
.open = binder_##name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
}
static int binder_proc_show(struct seq_file *m, void *unused);
BINDER_DEBUG_ENTRY(proc);
static int proc_show(struct seq_file *m, void *unused);
DEFINE_SHOW_ATTRIBUTE(proc);
/* This is only defined in include/asm-arm/sizes.h */
#ifndef SZ_1K
@ -143,7 +130,7 @@ static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR |
BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION;
module_param_named(debug_mask, binder_debug_mask, uint, 0644);
static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
module_param_named(devices, binder_devices_param, charp, 0444);
static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
@ -217,30 +204,8 @@ static inline void binder_stats_created(enum binder_stat_types type)
atomic_inc(&binder_stats.obj_created[type]);
}
struct binder_transaction_log_entry {
int debug_id;
int debug_id_done;
int call_type;
int from_proc;
int from_thread;
int target_handle;
int to_proc;
int to_thread;
int to_node;
int data_size;
int offsets_size;
int return_error_line;
uint32_t return_error;
uint32_t return_error_param;
const char *context_name;
};
struct binder_transaction_log {
atomic_t cur;
bool full;
struct binder_transaction_log_entry entry[32];
};
static struct binder_transaction_log binder_transaction_log;
static struct binder_transaction_log binder_transaction_log_failed;
struct binder_transaction_log binder_transaction_log;
struct binder_transaction_log binder_transaction_log_failed;
static struct binder_transaction_log_entry *binder_transaction_log_add(
struct binder_transaction_log *log)
@ -262,20 +227,6 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
return e;
}
struct binder_context {
struct binder_node *binder_context_mgr_node;
struct mutex context_mgr_node_lock;
kuid_t binder_context_mgr_uid;
const char *name;
};
struct binder_device {
struct hlist_node hlist;
struct miscdevice miscdev;
struct binder_context context;
};
/**
* struct binder_work - work enqueued on a worklist
* @entry: node enqueued on list
@ -540,6 +491,7 @@ struct binder_priority {
* @inner_lock: can nest under outer_lock and/or node lock
* @outer_lock: no nesting under innor or node lock
* Lock order: 1) outer, 2) node, 3) inner
* @binderfs_entry: process-specific binderfs log file
*
* Bookkeeping structure for binder processes
*/
@ -571,6 +523,7 @@ struct binder_proc {
struct binder_context *context;
spinlock_t inner_lock;
spinlock_t outer_lock;
struct dentry *binderfs_entry;
};
enum {
@ -3450,7 +3403,7 @@ static void binder_transaction(struct binder_proc *proc,
binder_size_t parent_offset;
struct binder_fd_array_object *fda =
to_binder_fd_array_object(hdr);
size_t num_valid = (buffer_offset - off_start_offset) *
size_t num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
struct binder_buffer_object *parent =
binder_validate_ptr(target_proc, t->buffer,
@ -3524,7 +3477,7 @@ static void binder_transaction(struct binder_proc *proc,
t->buffer->user_data + sg_buf_offset;
sg_buf_offset += ALIGN(bp->length, sizeof(u64));
num_valid = (buffer_offset - off_start_offset) *
num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
ret = binder_fixup_parent(t, thread, bp,
off_start_offset,
@ -5230,6 +5183,8 @@ static int binder_open(struct inode *nodp, struct file *filp)
{
struct binder_proc *proc;
struct binder_device *binder_dev;
struct binderfs_info *info;
struct dentry *binder_binderfs_dir_entry_proc = NULL;
binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
current->group_leader->pid, current->pid);
@ -5251,8 +5206,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
proc->default_priority.prio = NICE_TO_PRIO(0);
}
binder_dev = container_of(filp->private_data, struct binder_device,
miscdev);
/* binderfs stashes devices in i_private */
if (is_binderfs_device(nodp)) {
binder_dev = nodp->i_private;
info = nodp->i_sb->s_fs_info;
binder_binderfs_dir_entry_proc = info->proc_log_dir;
} else {
binder_dev = container_of(filp->private_data,
struct binder_device, miscdev);
}
proc->context = &binder_dev->context;
binder_alloc_init(&proc->alloc);
@ -5280,7 +5242,36 @@ static int binder_open(struct inode *nodp, struct file *filp)
proc->debugfs_entry = debugfs_create_file(strbuf, 0444,
binder_debugfs_dir_entry_proc,
(void *)(unsigned long)proc->pid,
&binder_proc_fops);
&proc_fops);
}
if (binder_binderfs_dir_entry_proc) {
char strbuf[11];
struct dentry *binderfs_entry;
snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
/*
* Similar to debugfs, the process specific log file is shared
* between contexts. If the file has already been created for a
* process, the following binderfs_create_file() call will
* fail with error code EEXIST if another context of the same
* process invoked binder_open(). This is ok since same as
* debugfs, the log file will contain information on all
* contexts of a given PID.
*/
binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc,
strbuf, &proc_fops, (void *)(unsigned long)proc->pid);
if (!IS_ERR(binderfs_entry)) {
proc->binderfs_entry = binderfs_entry;
} else {
int error;
error = PTR_ERR(binderfs_entry);
if (error != -EEXIST) {
pr_warn("Unable to create file %s in binderfs (error %d)\n",
strbuf, error);
}
}
}
return 0;
@ -5322,6 +5313,12 @@ static int binder_release(struct inode *nodp, struct file *filp)
struct binder_proc *proc = filp->private_data;
debugfs_remove(proc->debugfs_entry);
if (proc->binderfs_entry) {
binderfs_remove_file(proc->binderfs_entry);
proc->binderfs_entry = NULL;
}
binder_defer_work(proc, BINDER_DEFERRED_RELEASE);
return 0;
@ -5928,7 +5925,7 @@ static void print_binder_proc_stats(struct seq_file *m,
}
static int binder_state_show(struct seq_file *m, void *unused)
int binder_state_show(struct seq_file *m, void *unused)
{
struct binder_proc *proc;
struct binder_node *node;
@ -5967,7 +5964,7 @@ static int binder_state_show(struct seq_file *m, void *unused)
return 0;
}
static int binder_stats_show(struct seq_file *m, void *unused)
int binder_stats_show(struct seq_file *m, void *unused)
{
struct binder_proc *proc;
@ -5983,7 +5980,7 @@ static int binder_stats_show(struct seq_file *m, void *unused)
return 0;
}
static int binder_transactions_show(struct seq_file *m, void *unused)
int binder_transactions_show(struct seq_file *m, void *unused)
{
struct binder_proc *proc;
@ -5996,7 +5993,7 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
return 0;
}
static int binder_proc_show(struct seq_file *m, void *unused)
static int proc_show(struct seq_file *m, void *unused)
{
struct binder_proc *itr;
int pid = (unsigned long)m->private;
@ -6039,7 +6036,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
"\n" : " (incomplete)\n");
}
static int binder_transaction_log_show(struct seq_file *m, void *unused)
int binder_transaction_log_show(struct seq_file *m, void *unused)
{
struct binder_transaction_log *log = m->private;
unsigned int log_cur = atomic_read(&log->cur);
@ -6060,7 +6057,7 @@ static int binder_transaction_log_show(struct seq_file *m, void *unused)
return 0;
}
static const struct file_operations binder_fops = {
const struct file_operations binder_fops = {
.owner = THIS_MODULE,
.poll = binder_poll,
.unlocked_ioctl = binder_ioctl,
@ -6071,11 +6068,6 @@ static const struct file_operations binder_fops = {
.release = binder_release,
};
BINDER_DEBUG_ENTRY(state);
BINDER_DEBUG_ENTRY(stats);
BINDER_DEBUG_ENTRY(transactions);
BINDER_DEBUG_ENTRY(transaction_log);
static int __init init_binder_device(const char *name)
{
int ret;
@ -6107,9 +6099,10 @@ static int __init init_binder_device(const char *name)
static int __init binder_init(void)
{
int ret;
char *device_name, *device_names, *device_tmp;
char *device_name, *device_tmp;
struct binder_device *device;
struct hlist_node *tmp;
char *device_names = NULL;
ret = binder_alloc_shrinker_init();
if (ret)
@ -6151,24 +6144,30 @@ static int __init binder_init(void)
&binder_transaction_log_fops);
}
/*
* Copy the module_parameter string, because we don't want to
* tokenize it in-place.
*/
device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL);
if (!device_names) {
ret = -ENOMEM;
goto err_alloc_device_names_failed;
}
strcpy(device_names, binder_devices_param);
if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) &&
strcmp(binder_devices_param, "") != 0) {
/*
* Copy the module_parameter string, because we don't want to
* tokenize it in-place.
*/
device_names = kstrdup(binder_devices_param, GFP_KERNEL);
if (!device_names) {
ret = -ENOMEM;
goto err_alloc_device_names_failed;
}
device_tmp = device_names;
while ((device_name = strsep(&device_tmp, ","))) {
ret = init_binder_device(device_name);
if (ret)
goto err_init_binder_device_failed;
device_tmp = device_names;
while ((device_name = strsep(&device_tmp, ","))) {
ret = init_binder_device(device_name);
if (ret)
goto err_init_binder_device_failed;
}
}
ret = init_binderfs();
if (ret)
goto err_init_binder_device_failed;
return ret;
err_init_binder_device_failed:

View file

@ -0,0 +1,144 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BINDER_INTERNAL_H
#define _LINUX_BINDER_INTERNAL_H
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/uidgid.h>
struct binder_context {
struct binder_node *binder_context_mgr_node;
struct mutex context_mgr_node_lock;
kuid_t binder_context_mgr_uid;
const char *name;
};
/**
* struct binder_device - information about a binder device node
* @hlist: list of binder devices (only used for devices requested via
* CONFIG_ANDROID_BINDER_DEVICES)
* @miscdev: information about a binder character device node
* @context: binder context information
* @binderfs_inode: This is the inode of the root dentry of the super block
* belonging to a binderfs mount.
*/
struct binder_device {
struct hlist_node hlist;
struct miscdevice miscdev;
struct binder_context context;
struct inode *binderfs_inode;
};
/**
* binderfs_mount_opts - mount options for binderfs
* @max: maximum number of allocatable binderfs binder devices
* @stats_mode: enable binder stats in binderfs.
*/
struct binderfs_mount_opts {
int max;
int stats_mode;
};
/**
* binderfs_info - information about a binderfs mount
* @ipc_ns: The ipc namespace the binderfs mount belongs to.
* @control_dentry: This records the dentry of this binderfs mount
* binder-control device.
* @root_uid: uid that needs to be used when a new binder device is
* created.
* @root_gid: gid that needs to be used when a new binder device is
* created.
* @mount_opts: The mount options in use.
* @device_count: The current number of allocated binder devices.
* @proc_log_dir: Pointer to the directory dentry containing process-specific
* logs.
*/
struct binderfs_info {
struct ipc_namespace *ipc_ns;
struct dentry *control_dentry;
kuid_t root_uid;
kgid_t root_gid;
struct binderfs_mount_opts mount_opts;
int device_count;
struct dentry *proc_log_dir;
};
extern const struct file_operations binder_fops;
extern char *binder_devices_param;
#ifdef CONFIG_ANDROID_BINDERFS
extern bool is_binderfs_device(const struct inode *inode);
extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name,
const struct file_operations *fops,
void *data);
extern void binderfs_remove_file(struct dentry *dentry);
#else
static inline bool is_binderfs_device(const struct inode *inode)
{
return false;
}
static inline struct dentry *binderfs_create_file(struct dentry *dir,
const char *name,
const struct file_operations *fops,
void *data)
{
return NULL;
}
static inline void binderfs_remove_file(struct dentry *dentry) {}
#endif
#ifdef CONFIG_ANDROID_BINDERFS
extern int __init init_binderfs(void);
#else
static inline int __init init_binderfs(void)
{
return 0;
}
#endif
int binder_stats_show(struct seq_file *m, void *unused);
DEFINE_SHOW_ATTRIBUTE(binder_stats);
int binder_state_show(struct seq_file *m, void *unused);
DEFINE_SHOW_ATTRIBUTE(binder_state);
int binder_transactions_show(struct seq_file *m, void *unused);
DEFINE_SHOW_ATTRIBUTE(binder_transactions);
int binder_transaction_log_show(struct seq_file *m, void *unused);
DEFINE_SHOW_ATTRIBUTE(binder_transaction_log);
struct binder_transaction_log_entry {
int debug_id;
int debug_id_done;
int call_type;
int from_proc;
int from_thread;
int target_handle;
int to_proc;
int to_thread;
int to_node;
int data_size;
int offsets_size;
int return_error_line;
uint32_t return_error;
uint32_t return_error_param;
const char *context_name;
};
struct binder_transaction_log {
atomic_t cur;
bool full;
struct binder_transaction_log_entry entry[32];
};
extern struct binder_transaction_log binder_transaction_log;
extern struct binder_transaction_log binder_transaction_log_failed;
#endif /* _LINUX_BINDER_INTERNAL_H */

790
drivers/android/binderfs.c Normal file
View file

@ -0,0 +1,790 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/compiler_types.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/gfp.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/ipc_namespace.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/namei.h>
#include <linux/magic.h>
#include <linux/major.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/radix-tree.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock_types.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/user_namespace.h>
#include <linux/xarray.h>
#include <uapi/asm-generic/errno-base.h>
#include <uapi/linux/android/binder.h>
#include <uapi/linux/android/binderfs.h>
#include "binder_internal.h"
#define FIRST_INODE 1
#define SECOND_INODE 2
#define INODE_OFFSET 3
#define INTSTRLEN 21
#define BINDERFS_MAX_MINOR (1U << MINORBITS)
/* Ensure that the initial ipc namespace always has devices available. */
#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4)
static dev_t binderfs_dev;
static DEFINE_MUTEX(binderfs_minors_mutex);
static DEFINE_IDA(binderfs_minors);
enum {
Opt_max,
Opt_stats_mode,
Opt_err
};
enum binderfs_stats_mode {
STATS_NONE,
STATS_GLOBAL,
};
static const match_table_t tokens = {
{ Opt_max, "max=%d" },
{ Opt_stats_mode, "stats=%s" },
{ Opt_err, NULL }
};
static inline struct binderfs_info *BINDERFS_I(const struct inode *inode)
{
return inode->i_sb->s_fs_info;
}
bool is_binderfs_device(const struct inode *inode)
{
if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC)
return true;
return false;
}
/**
* binderfs_binder_device_create - allocate inode from super block of a
* binderfs mount
* @ref_inode: inode from wich the super block will be taken
* @userp: buffer to copy information about new device for userspace to
* @req: struct binderfs_device as copied from userspace
*
* This function allocates a new binder_device and reserves a new minor
* number for it.
* Minor numbers are limited and tracked globally in binderfs_minors. The
* function will stash a struct binder_device for the specific binder
* device in i_private of the inode.
* It will go on to allocate a new inode from the super block of the
* filesystem mount, stash a struct binder_device in its i_private field
* and attach a dentry to that inode.
*
* Return: 0 on success, negative errno on failure
*/
static int binderfs_binder_device_create(struct inode *ref_inode,
struct binderfs_device __user *userp,
struct binderfs_device *req)
{
int minor, ret;
struct dentry *dentry, *root;
struct binder_device *device;
char *name = NULL;
size_t name_len;
struct inode *inode = NULL;
struct super_block *sb = ref_inode->i_sb;
struct binderfs_info *info = sb->s_fs_info;
#if defined(CONFIG_IPC_NS)
bool use_reserve = (info->ipc_ns == &init_ipc_ns);
#else
bool use_reserve = true;
#endif
/* Reserve new minor number for the new device. */
mutex_lock(&binderfs_minors_mutex);
if (++info->device_count <= info->mount_opts.max)
minor = ida_alloc_max(&binderfs_minors,
use_reserve ? BINDERFS_MAX_MINOR :
BINDERFS_MAX_MINOR_CAPPED,
GFP_KERNEL);
else
minor = -ENOSPC;
if (minor < 0) {
--info->device_count;
mutex_unlock(&binderfs_minors_mutex);
return minor;
}
mutex_unlock(&binderfs_minors_mutex);
ret = -ENOMEM;
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
goto err;
inode = new_inode(sb);
if (!inode)
goto err;
inode->i_ino = minor + INODE_OFFSET;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
init_special_inode(inode, S_IFCHR | 0600,
MKDEV(MAJOR(binderfs_dev), minor));
inode->i_fop = &binder_fops;
inode->i_uid = info->root_uid;
inode->i_gid = info->root_gid;
req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */
name_len = strlen(req->name);
/* Make sure to include terminating NUL byte */
name = kmemdup(req->name, name_len + 1, GFP_KERNEL);
if (!name)
goto err;
device->binderfs_inode = inode;
device->context.binder_context_mgr_uid = INVALID_UID;
device->context.name = name;
device->miscdev.name = name;
device->miscdev.minor = minor;
mutex_init(&device->context.context_mgr_node_lock);
req->major = MAJOR(binderfs_dev);
req->minor = minor;
if (userp && copy_to_user(userp, req, sizeof(*req))) {
ret = -EFAULT;
goto err;
}
root = sb->s_root;
inode_lock(d_inode(root));
/* look it up */
dentry = lookup_one_len(name, root, name_len);
if (IS_ERR(dentry)) {
inode_unlock(d_inode(root));
ret = PTR_ERR(dentry);
goto err;
}
if (d_really_is_positive(dentry)) {
/* already exists */
dput(dentry);
inode_unlock(d_inode(root));
ret = -EEXIST;
goto err;
}
inode->i_private = device;
d_instantiate(dentry, inode);
fsnotify_create(root->d_inode, dentry);
inode_unlock(d_inode(root));
return 0;
err:
kfree(name);
kfree(device);
mutex_lock(&binderfs_minors_mutex);
--info->device_count;
ida_free(&binderfs_minors, minor);
mutex_unlock(&binderfs_minors_mutex);
iput(inode);
return ret;
}
/**
* binderfs_ctl_ioctl - handle binder device node allocation requests
*
* The request handler for the binder-control device. All requests operate on
* the binderfs mount the binder-control device resides in:
* - BINDER_CTL_ADD
* Allocate a new binder device.
*
* Return: 0 on success, negative errno on failure
*/
static long binder_ctl_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
int ret = -EINVAL;
struct inode *inode = file_inode(file);
struct binderfs_device __user *device = (struct binderfs_device __user *)arg;
struct binderfs_device device_req;
switch (cmd) {
case BINDER_CTL_ADD:
ret = copy_from_user(&device_req, device, sizeof(device_req));
if (ret) {
ret = -EFAULT;
break;
}
ret = binderfs_binder_device_create(inode, device, &device_req);
break;
default:
break;
}
return ret;
}
static void binderfs_evict_inode(struct inode *inode)
{
struct binder_device *device = inode->i_private;
struct binderfs_info *info = BINDERFS_I(inode);
clear_inode(inode);
if (!S_ISCHR(inode->i_mode) || !device)
return;
mutex_lock(&binderfs_minors_mutex);
--info->device_count;
ida_free(&binderfs_minors, device->miscdev.minor);
mutex_unlock(&binderfs_minors_mutex);
kfree(device->context.name);
kfree(device);
}
/**
* binderfs_parse_mount_opts - parse binderfs mount options
* @data: options to set (can be NULL in which case defaults are used)
*/
static int binderfs_parse_mount_opts(char *data,
struct binderfs_mount_opts *opts)
{
char *p, *stats;
opts->max = BINDERFS_MAX_MINOR;
opts->stats_mode = STATS_NONE;
while ((p = strsep(&data, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
int token;
int max_devices;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_max:
if (match_int(&args[0], &max_devices) ||
(max_devices < 0 ||
(max_devices > BINDERFS_MAX_MINOR)))
return -EINVAL;
opts->max = max_devices;
break;
case Opt_stats_mode:
if (!capable(CAP_SYS_ADMIN))
return -EINVAL;
stats = match_strdup(&args[0]);
if (!stats)
return -ENOMEM;
if (strcmp(stats, "global") != 0) {
kfree(stats);
return -EINVAL;
}
opts->stats_mode = STATS_GLOBAL;
kfree(stats);
break;
default:
pr_err("Invalid mount options\n");
return -EINVAL;
}
}
return 0;
}
static int binderfs_remount(struct super_block *sb, int *flags, char *data)
{
int prev_stats_mode, ret;
struct binderfs_info *info = sb->s_fs_info;
prev_stats_mode = info->mount_opts.stats_mode;
ret = binderfs_parse_mount_opts(data, &info->mount_opts);
if (ret)
return ret;
if (prev_stats_mode != info->mount_opts.stats_mode) {
pr_err("Binderfs stats mode cannot be changed during a remount\n");
info->mount_opts.stats_mode = prev_stats_mode;
return -EINVAL;
}
return 0;
}
static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root)
{
struct binderfs_info *info;
info = root->d_sb->s_fs_info;
if (info->mount_opts.max <= BINDERFS_MAX_MINOR)
seq_printf(seq, ",max=%d", info->mount_opts.max);
if (info->mount_opts.stats_mode == STATS_GLOBAL)
seq_printf(seq, ",stats=global");
return 0;
}
static const struct super_operations binderfs_super_ops = {
.evict_inode = binderfs_evict_inode,
.remount_fs = binderfs_remount,
.show_options = binderfs_show_mount_opts,
.statfs = simple_statfs,
};
static inline bool is_binderfs_control_device(const struct dentry *dentry)
{
struct binderfs_info *info = dentry->d_sb->s_fs_info;
return info->control_dentry == dentry;
}
static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
if (is_binderfs_control_device(old_dentry) ||
is_binderfs_control_device(new_dentry))
return -EPERM;
return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
static int binderfs_unlink(struct inode *dir, struct dentry *dentry)
{
if (is_binderfs_control_device(dentry))
return -EPERM;
return simple_unlink(dir, dentry);
}
static const struct file_operations binder_ctl_fops = {
.owner = THIS_MODULE,
.open = nonseekable_open,
.unlocked_ioctl = binder_ctl_ioctl,
.compat_ioctl = binder_ctl_ioctl,
.llseek = noop_llseek,
};
/**
* binderfs_binder_ctl_create - create a new binder-control device
* @sb: super block of the binderfs mount
*
* This function creates a new binder-control device node in the binderfs mount
* referred to by @sb.
*
* Return: 0 on success, negative errno on failure
*/
static int binderfs_binder_ctl_create(struct super_block *sb)
{
int minor, ret;
struct dentry *dentry;
struct binder_device *device;
struct inode *inode = NULL;
struct dentry *root = sb->s_root;
struct binderfs_info *info = sb->s_fs_info;
#if defined(CONFIG_IPC_NS)
bool use_reserve = (info->ipc_ns == &init_ipc_ns);
#else
bool use_reserve = true;
#endif
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
return -ENOMEM;
/* If we have already created a binder-control node, return. */
if (info->control_dentry) {
ret = 0;
goto out;
}
ret = -ENOMEM;
inode = new_inode(sb);
if (!inode)
goto out;
/* Reserve a new minor number for the new device. */
mutex_lock(&binderfs_minors_mutex);
minor = ida_alloc_max(&binderfs_minors,
use_reserve ? BINDERFS_MAX_MINOR :
BINDERFS_MAX_MINOR_CAPPED,
GFP_KERNEL);
mutex_unlock(&binderfs_minors_mutex);
if (minor < 0) {
ret = minor;
goto out;
}
inode->i_ino = SECOND_INODE;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
init_special_inode(inode, S_IFCHR | 0600,
MKDEV(MAJOR(binderfs_dev), minor));
inode->i_fop = &binder_ctl_fops;
inode->i_uid = info->root_uid;
inode->i_gid = info->root_gid;
device->binderfs_inode = inode;
device->miscdev.minor = minor;
dentry = d_alloc_name(root, "binder-control");
if (!dentry)
goto out;
inode->i_private = device;
info->control_dentry = dentry;
d_add(dentry, inode);
return 0;
out:
kfree(device);
iput(inode);
return ret;
}
static const struct inode_operations binderfs_dir_inode_operations = {
.lookup = simple_lookup,
.rename = binderfs_rename,
.unlink = binderfs_unlink,
};
static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
{
struct inode *ret;
ret = new_inode(sb);
if (ret) {
ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
ret->i_mode = mode;
ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
}
return ret;
}
static struct dentry *binderfs_create_dentry(struct dentry *parent,
const char *name)
{
struct dentry *dentry;
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry))
return dentry;
/* Return error if the file/dir already exists. */
if (d_really_is_positive(dentry)) {
dput(dentry);
return ERR_PTR(-EEXIST);
}
return dentry;
}
void binderfs_remove_file(struct dentry *dentry)
{
struct inode *parent_inode;
parent_inode = d_inode(dentry->d_parent);
inode_lock(parent_inode);
if (simple_positive(dentry)) {
dget(dentry);
simple_unlink(parent_inode, dentry);
d_delete(dentry);
dput(dentry);
}
inode_unlock(parent_inode);
}
struct dentry *binderfs_create_file(struct dentry *parent, const char *name,
const struct file_operations *fops,
void *data)
{
struct dentry *dentry;
struct inode *new_inode, *parent_inode;
struct super_block *sb;
parent_inode = d_inode(parent);
inode_lock(parent_inode);
dentry = binderfs_create_dentry(parent, name);
if (IS_ERR(dentry))
goto out;
sb = parent_inode->i_sb;
new_inode = binderfs_make_inode(sb, S_IFREG | 0444);
if (!new_inode) {
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
goto out;
}
new_inode->i_fop = fops;
new_inode->i_private = data;
d_instantiate(dentry, new_inode);
fsnotify_create(parent_inode, dentry);
out:
inode_unlock(parent_inode);
return dentry;
}
static struct dentry *binderfs_create_dir(struct dentry *parent,
const char *name)
{
struct dentry *dentry;
struct inode *new_inode, *parent_inode;
struct super_block *sb;
parent_inode = d_inode(parent);
inode_lock(parent_inode);
dentry = binderfs_create_dentry(parent, name);
if (IS_ERR(dentry))
goto out;
sb = parent_inode->i_sb;
new_inode = binderfs_make_inode(sb, S_IFDIR | 0755);
if (!new_inode) {
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
goto out;
}
new_inode->i_fop = &simple_dir_operations;
new_inode->i_op = &simple_dir_inode_operations;
set_nlink(new_inode, 2);
d_instantiate(dentry, new_inode);
inc_nlink(parent_inode);
fsnotify_mkdir(parent_inode, dentry);
out:
inode_unlock(parent_inode);
return dentry;
}
static int init_binder_logs(struct super_block *sb)
{
struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
struct binderfs_info *info;
int ret = 0;
binder_logs_root_dir = binderfs_create_dir(sb->s_root,
"binder_logs");
if (IS_ERR(binder_logs_root_dir)) {
ret = PTR_ERR(binder_logs_root_dir);
goto out;
}
dentry = binderfs_create_file(binder_logs_root_dir, "stats",
&binder_stats_fops, NULL);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out;
}
dentry = binderfs_create_file(binder_logs_root_dir, "state",
&binder_state_fops, NULL);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out;
}
dentry = binderfs_create_file(binder_logs_root_dir, "transactions",
&binder_transactions_fops, NULL);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out;
}
dentry = binderfs_create_file(binder_logs_root_dir,
"transaction_log",
&binder_transaction_log_fops,
&binder_transaction_log);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out;
}
dentry = binderfs_create_file(binder_logs_root_dir,
"failed_transaction_log",
&binder_transaction_log_fops,
&binder_transaction_log_failed);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out;
}
proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc");
if (IS_ERR(proc_log_dir)) {
ret = PTR_ERR(proc_log_dir);
goto out;
}
info = sb->s_fs_info;
info->proc_log_dir = proc_log_dir;
out:
return ret;
}
static int binderfs_fill_super(struct super_block *sb, void *data, int silent)
{
int ret;
struct binderfs_info *info;
struct inode *inode = NULL;
struct binderfs_device device_info = { 0 };
const char *name;
size_t len;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
/*
* The binderfs filesystem can be mounted by userns root in a
* non-initial userns. By default such mounts have the SB_I_NODEV flag
* set in s_iflags to prevent security issues where userns root can
* just create random device nodes via mknod() since it owns the
* filesystem mount. But binderfs does not allow to create any files
* including devices nodes. The only way to create binder devices nodes
* is through the binder-control device which userns root is explicitly
* allowed to do. So removing the SB_I_NODEV flag from s_iflags is both
* necessary and safe.
*/
sb->s_iflags &= ~SB_I_NODEV;
sb->s_iflags |= SB_I_NOEXEC;
sb->s_magic = BINDERFS_SUPER_MAGIC;
sb->s_op = &binderfs_super_ops;
sb->s_time_gran = 1;
sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL);
if (!sb->s_fs_info)
return -ENOMEM;
info = sb->s_fs_info;
info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
ret = binderfs_parse_mount_opts(data, &info->mount_opts);
if (ret)
return ret;
info->root_gid = make_kgid(sb->s_user_ns, 0);
if (!gid_valid(info->root_gid))
info->root_gid = GLOBAL_ROOT_GID;
info->root_uid = make_kuid(sb->s_user_ns, 0);
if (!uid_valid(info->root_uid))
info->root_uid = GLOBAL_ROOT_UID;
inode = new_inode(sb);
if (!inode)
return -ENOMEM;
inode->i_ino = FIRST_INODE;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | 0755;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_op = &binderfs_dir_inode_operations;
set_nlink(inode, 2);
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
ret = binderfs_binder_ctl_create(sb);
if (ret)
return ret;
name = binder_devices_param;
for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
strscpy(device_info.name, name, len + 1);
ret = binderfs_binder_device_create(inode, NULL, &device_info);
if (ret)
return ret;
name += len;
if (*name == ',')
name++;
}
if (info->mount_opts.stats_mode == STATS_GLOBAL)
return init_binder_logs(sb);
return 0;
}
static struct dentry *binderfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
return mount_nodev(fs_type, flags, data, binderfs_fill_super);
}
static void binderfs_kill_super(struct super_block *sb)
{
struct binderfs_info *info = sb->s_fs_info;
kill_litter_super(sb);
if (info && info->ipc_ns)
put_ipc_ns(info->ipc_ns);
kfree(info);
}
static struct file_system_type binder_fs_type = {
.name = "binder",
.mount = binderfs_mount,
.kill_sb = binderfs_kill_super,
.fs_flags = FS_USERNS_MOUNT,
};
int __init init_binderfs(void)
{
int ret;
const char *name;
size_t len;
/* Verify that the default binderfs device names are valid. */
name = binder_devices_param;
for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
if (len > BINDERFS_MAX_NAME)
return -E2BIG;
name += len;
if (*name == ',')
name++;
}
/* Allocate new major number for binderfs. */
ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR,
"binder");
if (ret)
return ret;
ret = register_filesystem(&binder_fs_type);
if (ret) {
unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR);
return ret;
}
return ret;
}

View file

@ -44,6 +44,10 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
#endif
/* Device links support. */
static LIST_HEAD(wait_for_suppliers);
static DEFINE_MUTEX(wfs_lock);
static LIST_HEAD(deferred_sync);
static unsigned int defer_sync_state_count = 1;
#ifdef CONFIG_SRCU
static DEFINE_MUTEX(device_links_lock);
@ -114,6 +118,9 @@ static int device_is_dependent(struct device *dev, void *target)
return ret;
list_for_each_entry(link, &dev->links.consumers, s_node) {
if (link->flags == DL_FLAG_SYNC_STATE_ONLY)
continue;
if (link->consumer == target)
return 1;
@ -139,8 +146,11 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
device_pm_move_last(dev);
device_for_each_child(dev, NULL, device_reorder_to_tail);
list_for_each_entry(link, &dev->links.consumers, s_node)
list_for_each_entry(link, &dev->links.consumers, s_node) {
if (link->flags == DL_FLAG_SYNC_STATE_ONLY)
continue;
device_reorder_to_tail(link->consumer, NULL);
}
return 0;
}
@ -199,6 +209,8 @@ struct device_link *device_link_add(struct device *consumer,
struct device_link *link;
if (!consumer || !supplier ||
(flags & DL_FLAG_SYNC_STATE_ONLY &&
flags != DL_FLAG_SYNC_STATE_ONLY) ||
((flags & DL_FLAG_STATELESS) &&
(flags & DL_FLAG_AUTOREMOVE_CONSUMER)))
return NULL;
@ -208,11 +220,14 @@ struct device_link *device_link_add(struct device *consumer,
/*
* If the supplier has not been fully registered yet or there is a
* reverse dependency between the consumer and the supplier already in
* the graph, return NULL.
* reverse (non-SYNC_STATE_ONLY) dependency between the consumer and
* the supplier already in the graph, return NULL. If the link is a
* SYNC_STATE_ONLY link, we don't check for reverse dependencies
* because it only affects sync_state() callbacks.
*/
if (!device_pm_initialized(supplier)
|| device_is_dependent(consumer, supplier)) {
|| (!(flags & DL_FLAG_SYNC_STATE_ONLY) &&
device_is_dependent(consumer, supplier))) {
link = NULL;
goto out;
}
@ -220,6 +235,11 @@ struct device_link *device_link_add(struct device *consumer,
list_for_each_entry(link, &supplier->links.consumers, s_node)
if (link->consumer == consumer) {
kref_get(&link->kref);
if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
!(flags & DL_FLAG_SYNC_STATE_ONLY)) {
link->flags &= ~DL_FLAG_SYNC_STATE_ONLY;
goto reorder;
}
goto out;
}
@ -290,6 +310,9 @@ struct device_link *device_link_add(struct device *consumer,
}
}
if (flags & DL_FLAG_SYNC_STATE_ONLY)
goto out;
reorder:
/*
* Move the consumer and all of the devices depending on it to the end
* of dpm_list and the devices_kset list.
@ -311,6 +334,70 @@ struct device_link *device_link_add(struct device *consumer,
}
EXPORT_SYMBOL_GPL(device_link_add);
/**
* device_link_wait_for_supplier - Add device to wait_for_suppliers list
* @consumer: Consumer device
*
* Marks the @consumer device as waiting for suppliers to become available by
* adding it to the wait_for_suppliers list. The consumer device will never be
* probed until it's removed from the wait_for_suppliers list.
*
* The caller is responsible for adding the links to the supplier devices once
* they are available and removing the @consumer device from the
* wait_for_suppliers list once links to all the suppliers have been created.
*
* This function is NOT meant to be called from the probe function of the
* consumer but rather from code that creates/adds the consumer device.
*/
static void device_link_wait_for_supplier(struct device *consumer,
bool need_for_probe)
{
mutex_lock(&wfs_lock);
list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers);
consumer->links.need_for_probe = need_for_probe;
mutex_unlock(&wfs_lock);
}
static void device_link_wait_for_mandatory_supplier(struct device *consumer)
{
device_link_wait_for_supplier(consumer, true);
}
static void device_link_wait_for_optional_supplier(struct device *consumer)
{
device_link_wait_for_supplier(consumer, false);
}
/**
* device_link_add_missing_supplier_links - Add links from consumer devices to
* supplier devices, leaving any
* consumer with inactive suppliers on
* the wait_for_suppliers list
*
* Loops through all consumers waiting on suppliers and tries to add all their
* supplier links. If that succeeds, the consumer device is removed from
* wait_for_suppliers list. Otherwise, they are left in the wait_for_suppliers
* list. Devices left on the wait_for_suppliers list will not be probed.
*
* The fwnode add_links callback is expected to return 0 if it has found and
* added all the supplier links for the consumer device. It should return an
* error if it isn't able to do so.
*
* The caller of device_link_wait_for_supplier() is expected to call this once
* it's aware of potential suppliers becoming available.
*/
static void device_link_add_missing_supplier_links(void)
{
struct device *dev, *tmp;
mutex_lock(&wfs_lock);
list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
links.needs_suppliers)
if (!fwnode_call_int_op(dev->fwnode, add_links, dev))
list_del_init(&dev->links.needs_suppliers);
mutex_unlock(&wfs_lock);
}
static void device_link_free(struct device_link *link)
{
put_device(link->consumer);
@ -434,10 +521,23 @@ int device_links_check_suppliers(struct device *dev)
struct device_link *link;
int ret = 0;
/*
* Device waiting for supplier to become available is not allowed to
* probe.
*/
mutex_lock(&wfs_lock);
if (!list_empty(&dev->links.needs_suppliers) &&
dev->links.need_for_probe) {
mutex_unlock(&wfs_lock);
return -EPROBE_DEFER;
}
mutex_unlock(&wfs_lock);
device_links_write_lock();
list_for_each_entry(link, &dev->links.suppliers, c_node) {
if (link->flags & DL_FLAG_STATELESS)
if (link->flags & DL_FLAG_STATELESS ||
link->flags & DL_FLAG_SYNC_STATE_ONLY)
continue;
if (link->status != DL_STATE_AVAILABLE) {
@ -453,6 +553,128 @@ int device_links_check_suppliers(struct device *dev)
return ret;
}
/**
* __device_links_queue_sync_state - Queue a device for sync_state() callback
* @dev: Device to call sync_state() on
* @list: List head to queue the @dev on
*
* Queues a device for a sync_state() callback when the device links write lock
* isn't held. This allows the sync_state() execution flow to use device links
* APIs. The caller must ensure this function is called with
* device_links_write_lock() held.
*
* This function does a get_device() to make sure the device is not freed while
* on this list.
*
* So the caller must also ensure that device_links_flush_sync_list() is called
* as soon as the caller releases device_links_write_lock(). This is necessary
* to make sure the sync_state() is called in a timely fashion and the
* put_device() is called on this device.
*/
static void __device_links_queue_sync_state(struct device *dev,
struct list_head *list)
{
struct device_link *link;
if (dev->state_synced)
return;
list_for_each_entry(link, &dev->links.consumers, s_node) {
if (link->flags & DL_FLAG_STATELESS)
continue;
if (link->status != DL_STATE_ACTIVE)
return;
}
/*
* Set the flag here to avoid adding the same device to a list more
* than once. This can happen if new consumers get added to the device
* and probed before the list is flushed.
*/
dev->state_synced = true;
if (WARN_ON(!list_empty(&dev->links.defer_sync)))
return;
get_device(dev);
list_add_tail(&dev->links.defer_sync, list);
}
/**
* device_links_flush_sync_list - Call sync_state() on a list of devices
* @list: List of devices to call sync_state() on
*
* Calls sync_state() on all the devices that have been queued for it. This
* function is used in conjunction with __device_links_queue_sync_state().
*/
static void device_links_flush_sync_list(struct list_head *list)
{
struct device *dev, *tmp;
list_for_each_entry_safe(dev, tmp, list, links.defer_sync) {
list_del_init(&dev->links.defer_sync);
device_lock(dev);
if (dev->bus->sync_state)
dev->bus->sync_state(dev);
else if (dev->driver && dev->driver->sync_state)
dev->driver->sync_state(dev);
device_unlock(dev);
put_device(dev);
}
}
void device_links_supplier_sync_state_pause(void)
{
device_links_write_lock();
defer_sync_state_count++;
device_links_write_unlock();
}
void device_links_supplier_sync_state_resume(void)
{
struct device *dev, *tmp;
LIST_HEAD(sync_list);
device_links_write_lock();
if (!defer_sync_state_count) {
WARN(true, "Unmatched sync_state pause/resume!");
goto out;
}
defer_sync_state_count--;
if (defer_sync_state_count)
goto out;
list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
/*
* Delete from deferred_sync list before queuing it to
* sync_list because defer_sync is used for both lists.
*/
list_del_init(&dev->links.defer_sync);
__device_links_queue_sync_state(dev, &sync_list);
}
out:
device_links_write_unlock();
device_links_flush_sync_list(&sync_list);
}
static int sync_state_resume_initcall(void)
{
device_links_supplier_sync_state_resume();
return 0;
}
late_initcall(sync_state_resume_initcall);
static void __device_links_supplier_defer_sync(struct device *sup)
{
if (list_empty(&sup->links.defer_sync))
list_add_tail(&sup->links.defer_sync, &deferred_sync);
}
/**
* device_links_driver_bound - Update device links after probing its driver.
* @dev: Device to update the links for.
@ -467,6 +689,16 @@ int device_links_check_suppliers(struct device *dev)
void device_links_driver_bound(struct device *dev)
{
struct device_link *link;
LIST_HEAD(sync_list);
/*
* If a device probes successfully, it's expected to have created all
* the device links it needs to or make new device links as it needs
* them. So, it no longer needs to wait on any suppliers.
*/
mutex_lock(&wfs_lock);
list_del_init(&dev->links.needs_suppliers);
mutex_unlock(&wfs_lock);
device_links_write_lock();
@ -484,11 +716,19 @@ void device_links_driver_bound(struct device *dev)
WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
WRITE_ONCE(link->status, DL_STATE_ACTIVE);
if (defer_sync_state_count)
__device_links_supplier_defer_sync(link->supplier);
else
__device_links_queue_sync_state(link->supplier,
&sync_list);
}
dev->links.status = DL_DEV_DRIVER_BOUND;
device_links_write_unlock();
device_links_flush_sync_list(&sync_list);
}
/**
@ -562,6 +802,7 @@ void device_links_driver_cleanup(struct device *dev)
WRITE_ONCE(link->status, DL_STATE_DORMANT);
}
list_del_init(&dev->links.defer_sync);
__device_links_no_driver(dev);
device_links_write_unlock();
@ -631,7 +872,8 @@ void device_links_unbind_consumers(struct device *dev)
list_for_each_entry(link, &dev->links.consumers, s_node) {
enum device_link_state status;
if (link->flags & DL_FLAG_STATELESS)
if (link->flags & DL_FLAG_STATELESS ||
link->flags & DL_FLAG_SYNC_STATE_ONLY)
continue;
status = link->status;
@ -667,6 +909,10 @@ static void device_links_purge(struct device *dev)
{
struct device_link *link, *ln;
mutex_lock(&wfs_lock);
list_del(&dev->links.needs_suppliers);
mutex_unlock(&wfs_lock);
/*
* Delete all of the remaining links from this device to any other
* devices (either consumers or suppliers).
@ -1507,6 +1753,8 @@ void device_initialize(struct device *dev)
#endif
INIT_LIST_HEAD(&dev->links.consumers);
INIT_LIST_HEAD(&dev->links.suppliers);
INIT_LIST_HEAD(&dev->links.needs_suppliers);
INIT_LIST_HEAD(&dev->links.defer_sync);
dev->links.status = DL_DEV_NO_DRIVER;
}
EXPORT_SYMBOL_GPL(device_initialize);
@ -1890,7 +2138,7 @@ int device_add(struct device *dev)
struct device *parent;
struct kobject *kobj;
struct class_interface *class_intf;
int error = -EINVAL;
int error = -EINVAL, fw_ret;
struct kobject *glue_dir = NULL;
dev = get_device(dev);
@ -1987,6 +2235,32 @@ int device_add(struct device *dev)
BUS_NOTIFY_ADD_DEVICE, dev);
kobject_uevent(&dev->kobj, KOBJ_ADD);
if (dev->fwnode && !dev->fwnode->dev)
dev->fwnode->dev = dev;
/*
* Check if any of the other devices (consumers) have been waiting for
* this device (supplier) to be added so that they can create a device
* link to it.
*
* This needs to happen after device_pm_add() because device_link_add()
* requires the supplier be registered before it's called.
*
* But this also needs to happe before bus_probe_device() to make sure
* waiting consumers can link to it before the driver is bound to the
* device and the driver sync_state callback is called for this device.
*/
device_link_add_missing_supplier_links();
if (fwnode_has_op(dev->fwnode, add_links)) {
fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
if (fw_ret == -ENODEV)
device_link_wait_for_mandatory_supplier(dev);
else if (fw_ret)
device_link_wait_for_optional_supplier(dev);
}
bus_probe_device(dev);
if (parent)
klist_add_tail(&dev->p->knode_parent,
@ -2129,6 +2403,9 @@ void device_del(struct device *dev)
kill_device(dev);
device_unlock(dev);
if (dev->fwnode && dev->fwnode->dev == dev)
dev->fwnode->dev = NULL;
/* Notify clients of device removal. This call must come
* before dpm_sysfs_remove().
*/

View file

@ -517,6 +517,7 @@ __cpu_device_create(struct device *parent, void *drvdata,
dev->parent = parent;
dev->groups = groups;
dev->release = device_create_release;
device_set_pm_not_required(dev);
dev_set_drvdata(dev, drvdata);
retval = kobject_set_name_vargs(&dev->kobj, fmt, args);

View file

@ -150,5 +150,17 @@ config FW_LOADER_USER_HELPER_FALLBACK
If you are unsure about this, say N here.
config FW_CACHE
bool "Enable firmware caching during suspend"
depends on PM_SLEEP
default y if PM_SLEEP
help
Because firmware caching generates uevent messages that are sent
over a netlink socket, it can prevent suspend on many platforms.
It is also not always useful, so on such platforms we have the
option.
If unsure, say Y.
endif # FW_LOADER
endmenu

View file

@ -67,8 +67,11 @@ static ssize_t node_read_meminfo(struct device *dev,
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
struct sysinfo i;
unsigned long sreclaimable, sunreclaimable;
si_meminfo_node(&i, nid);
sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
n = sprintf(buf,
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
@ -114,10 +117,14 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
#ifdef CONFIG_SHADOW_CALL_STACK
"Node %d ShadowCallStack:%8lu kB\n"
#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d WritebackTmp: %8lu kB\n"
"Node %d KReclaimable: %8lu kB\n"
"Node %d Slab: %8lu kB\n"
"Node %d SReclaimable: %8lu kB\n"
"Node %d SUnreclaim: %8lu kB\n"
@ -134,24 +141,28 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
nid, K(sreclaimable +
node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
nid, K(sreclaimable + sunreclaimable),
nid, K(sreclaimable),
nid, K(sunreclaimable)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
,
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
HPAGE_PMD_NR));
#else
nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
HPAGE_PMD_NR)
#endif
);
n += hugetlb_report_node_meminfo(nid, buf + n);
return n;
}

View file

@ -124,6 +124,10 @@ void device_pm_unlock(void)
*/
void device_pm_add(struct device *dev)
{
/* Skip PM setup/initialization. */
if (device_pm_not_required(dev))
return;
pr_debug("PM: Adding info for %s:%s\n",
dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
device_pm_check_callbacks(dev);
@ -142,6 +146,9 @@ void device_pm_add(struct device *dev)
*/
void device_pm_remove(struct device *dev)
{
if (device_pm_not_required(dev))
return;
pr_debug("PM: Removing info for %s:%s\n",
dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
complete_all(&dev->power.completion);

View file

@ -648,6 +648,10 @@ int dpm_sysfs_add(struct device *dev)
{
int rc;
/* No need to create PM sysfs if explicitly disabled. */
if (device_pm_not_required(dev))
return 0;
rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
if (rc)
return rc;
@ -727,6 +731,8 @@ void rpm_sysfs_remove(struct device *dev)
void dpm_sysfs_remove(struct device *dev)
{
if (device_pm_not_required(dev))
return;
sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
dev_pm_qos_constraints_destroy(dev);
rpm_sysfs_remove(dev);

View file

@ -82,6 +82,8 @@ struct clk_core {
struct clk_core *new_child;
unsigned long flags;
bool orphan;
bool need_sync;
bool boot_enabled;
unsigned int enable_count;
unsigned int prepare_count;
unsigned int protect_count;
@ -1282,6 +1284,10 @@ static void clk_unprepare_unused_subtree(struct clk_core *core)
hlist_for_each_entry(child, &core->children, child_node)
clk_unprepare_unused_subtree(child);
if (dev_has_sync_state(core->dev) &&
!(core->flags & CLK_DONT_HOLD_STATE))
return;
/*
* setting CLK_ENABLE_HAND_OFF flag triggers this conditional
*
@ -1326,6 +1332,10 @@ static void clk_disable_unused_subtree(struct clk_core *core)
hlist_for_each_entry(child, &core->children, child_node)
clk_disable_unused_subtree(child);
if (dev_has_sync_state(core->dev) &&
!(core->flags & CLK_DONT_HOLD_STATE))
return;
/*
* setting CLK_ENABLE_HAND_OFF flag triggers this conditional
*
@ -1425,6 +1435,38 @@ static int clk_disable_unused(void)
}
late_initcall_sync(clk_disable_unused);
static void clk_unprepare_disable_dev_subtree(struct clk_core *core,
struct device *dev)
{
struct clk_core *child;
lockdep_assert_held(&prepare_lock);
hlist_for_each_entry(child, &core->children, child_node)
clk_unprepare_disable_dev_subtree(child, dev);
if (core->dev != dev || !core->need_sync)
return;
clk_core_disable_unprepare(core);
}
void clk_sync_state(struct device *dev)
{
struct clk_core *core;
clk_prepare_lock();
hlist_for_each_entry(core, &clk_root_list, child_node)
clk_unprepare_disable_dev_subtree(core, dev);
hlist_for_each_entry(core, &clk_orphan_list, child_node)
clk_unprepare_disable_dev_subtree(core, dev);
clk_prepare_unlock();
}
EXPORT_SYMBOL_GPL(clk_sync_state);
static int clk_core_determine_round_nolock(struct clk_core *core,
struct clk_rate_request *req)
{
@ -1774,6 +1816,33 @@ static int clk_fetch_parent_index(struct clk_core *core,
return -EINVAL;
}
static void clk_core_hold_state(struct clk_core *core)
{
if (core->need_sync || !core->boot_enabled)
return;
if (core->orphan || !dev_has_sync_state(core->dev))
return;
if (core->flags & CLK_DONT_HOLD_STATE)
return;
core->need_sync = !clk_core_prepare_enable(core);
}
static void __clk_core_update_orphan_hold_state(struct clk_core *core)
{
struct clk_core *child;
if (core->orphan)
return;
clk_core_hold_state(core);
hlist_for_each_entry(child, &core->children, child_node)
__clk_core_update_orphan_hold_state(child);
}
/*
* Update the orphan status of @core and all its children.
*/
@ -4100,6 +4169,8 @@ static int __clk_core_init(struct clk_core *core)
rate = 0;
core->rate = core->req_rate = rate;
core->boot_enabled = clk_core_is_enabled(core);
/*
* Enable CLK_IS_CRITICAL clocks so newly added critical clocks
* don't get accidentally disabled when walking the orphan tree and
@ -4115,6 +4186,8 @@ static int __clk_core_init(struct clk_core *core)
clk_enable_unlock(flags);
}
clk_core_hold_state(core);
/*
* walk the list of orphan clocks and reparent any that newly finds a
* parent.
@ -4134,6 +4207,7 @@ static int __clk_core_init(struct clk_core *core)
__clk_set_parent_after(orphan, parent, NULL);
__clk_recalc_accuracies(orphan);
__clk_recalc_rates(orphan, 0);
__clk_core_update_orphan_hold_state(orphan);
}
}

View file

@ -245,6 +245,15 @@ config CPUFREQ_DT_PLATDEV
If in doubt, say N.
config CPUFREQ_DUMMY
tristate "Dummy CPU frequency driver"
help
This option adds a generic dummy CPUfreq driver, which sets a fake
2-frequency table when initializing each policy and otherwise does
nothing.
If in doubt, say N
if X86
source "drivers/cpufreq/Kconfig.x86"
endif

View file

@ -21,6 +21,8 @@ obj-$(CONFIG_CPU_BOOST) += cpu-boost.o
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
obj-$(CONFIG_CPUFREQ_DUMMY) += dummy-cpufreq.o
##################################################################################
# x86 drivers.
# Link order matters. K8 is preferred to ACPI because of firmware bugs in early

View file

@ -523,13 +523,14 @@ void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end)
struct uid_entry *uid_entry;
struct hlist_node *tmp;
unsigned long flags;
u64 uid;
spin_lock_irqsave(&uid_lock, flags);
for (; uid_start <= uid_end; uid_start++) {
for (uid = uid_start; uid <= uid_end; uid++) {
hash_for_each_possible_safe(uid_hash_table, uid_entry, tmp,
hash, uid_start) {
if (uid_start == uid_entry->uid) {
hash, uid) {
if (uid == uid_entry->uid) {
hash_del_rcu(&uid_entry->hash);
call_rcu(&uid_entry->rcu, uid_entry_reclaim);
}

View file

@ -0,0 +1,60 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2019 Google, Inc.
*/
#include <linux/cpufreq.h>
#include <linux/module.h>
static struct cpufreq_frequency_table freq_table[] = {
{ .frequency = 1 },
{ .frequency = 2 },
{ .frequency = CPUFREQ_TABLE_END },
};
static int dummy_cpufreq_target_index(struct cpufreq_policy *policy,
unsigned int index)
{
return 0;
}
static int dummy_cpufreq_driver_init(struct cpufreq_policy *policy)
{
policy->freq_table = freq_table;
return 0;
}
static unsigned int dummy_cpufreq_get(unsigned int cpu)
{
return 1;
}
static int dummy_cpufreq_verify(struct cpufreq_policy *policy)
{
return 0;
}
static struct cpufreq_driver dummy_cpufreq_driver = {
.name = "dummy",
.target_index = dummy_cpufreq_target_index,
.init = dummy_cpufreq_driver_init,
.get = dummy_cpufreq_get,
.verify = dummy_cpufreq_verify,
.attr = cpufreq_generic_attr,
};
static int __init dummy_cpufreq_init(void)
{
return cpufreq_register_driver(&dummy_cpufreq_driver);
}
static void __exit dummy_cpufreq_exit(void)
{
cpufreq_unregister_driver(&dummy_cpufreq_driver);
}
module_init(dummy_cpufreq_init);
module_exit(dummy_cpufreq_exit);
MODULE_AUTHOR("Connor O'Brien <connoro@google.com>");
MODULE_DESCRIPTION("dummy cpufreq driver");
MODULE_LICENSE("GPL");

View file

@ -40,4 +40,19 @@ config GNSS_UBX_SERIAL
If unsure, say N.
config GNSS_CMDLINE_SERIAL
tristate "Command line test driver for GNSS"
depends on SERIAL_DEV_BUS
select GNSS_SERIAL
---help---
Say Y here if you want to test the GNSS subsystem but do not have a
way to communicate a binding through firmware such as DT or ACPI.
The correct serdev device and protocol type must be specified on
the module command line.
To compile this driver as a module, choose M here: the module will
be called gnss-cmdline.
If unsure, say N.
endif # GNSS

View file

@ -14,3 +14,6 @@ gnss-sirf-y := sirf.o
obj-$(CONFIG_GNSS_UBX_SERIAL) += gnss-ubx.o
gnss-ubx-y := ubx.o
obj-$(CONFIG_GNSS_CMDLINE_SERIAL) += gnss-cmdline.o
gnss-cmdline-y := cmdline.o

139
drivers/gnss/cmdline.c Normal file
View file

@ -0,0 +1,139 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Test driver for GNSS. This driver requires the serdev binding and protocol
* type to be specified on the module command line.
*
* Copyright 2019 Google LLC
*/
#include <linux/device.h>
#include <linux/gnss.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/serdev.h>
#include <linux/slab.h>
#include <linux/string.h>
#include "serial.h"
#define GNSS_CMDLINE_MODULE_NAME "gnss-cmdline"
#define gnss_cmdline_err(...) \
pr_err(GNSS_CMDLINE_MODULE_NAME ": " __VA_ARGS__)
static char *serdev;
module_param(serdev, charp, 0644);
MODULE_PARM_DESC(serdev, "serial device to wrap");
static int type;
module_param(type, int, 0644);
MODULE_PARM_DESC(serdev, "GNSS protocol type (see 'enum gnss_type')");
static struct serdev_device *serdev_device;
static int name_match(struct device *dev, void *data)
{
return strstr(dev_name(dev), data) != NULL;
}
static int __init gnss_cmdline_init(void)
{
struct device *serial_dev, *port_dev, *serdev_dev;
char *driver_name, *port_name, *serdev_name;
char *serdev_dup, *serdev_dup_sep;
struct gnss_serial *gserial;
int err = -ENODEV;
/* User did not set the serdev module parameter */
if (!serdev)
return 0;
if (type < 0 || type >= GNSS_TYPE_COUNT) {
gnss_cmdline_err("invalid gnss type '%d'\n", type);
return -EINVAL;
}
serdev_dup = serdev_dup_sep = kstrdup(serdev, GFP_KERNEL);
if (!serdev_dup)
return -ENOMEM;
driver_name = strsep(&serdev_dup_sep, "/");
if (!driver_name) {
gnss_cmdline_err("driver name missing\n");
goto err_free_serdev_dup;
}
port_name = strsep(&serdev_dup_sep, "/");
if (!port_name) {
gnss_cmdline_err("port name missing\n");
goto err_free_serdev_dup;
}
serdev_name = strsep(&serdev_dup_sep, "/");
if (!serdev_name) {
gnss_cmdline_err("serdev name missing\n");
goto err_free_serdev_dup;
}
/* Find the driver device instance (e.g. serial8250) */
serial_dev = bus_find_device_by_name(&platform_bus_type,
NULL, driver_name);
if (!serial_dev) {
gnss_cmdline_err("no device '%s'\n", driver_name);
goto err_free_serdev_dup;
}
/* Find the port device instance (e.g. serial0) */
port_dev = device_find_child(serial_dev, port_name, name_match);
if (!port_dev) {
gnss_cmdline_err("no port '%s'\n", port_name);
goto err_free_serdev_dup;
}
/* Find the serdev device instance (e.g. serial0-0) */
serdev_dev = device_find_child(port_dev, serdev_name, name_match);
if (!serdev_dev) {
gnss_cmdline_err("no serdev '%s'\n", serdev_name);
goto err_free_serdev_dup;
}
gserial = gnss_serial_allocate(to_serdev_device(serdev_dev), 0);
if (IS_ERR(gserial)) {
err = PTR_ERR(gserial);
goto err_free_serdev_dup;
}
gserial->gdev->type = type;
err = gnss_serial_register(gserial);
if (err) {
gnss_serial_free(gserial);
goto err_free_serdev_dup;
}
serdev_device = to_serdev_device(serdev_dev);
err = 0;
err_free_serdev_dup:
kfree(serdev_dup);
return err;
}
static void __exit gnss_cmdline_exit(void)
{
struct gnss_serial *gserial;
if (!serdev_device)
return;
gserial = serdev_device_get_drvdata(serdev_device);
gnss_serial_deregister(gserial);
gnss_serial_free(gserial);
}
module_init(gnss_cmdline_init);
module_exit(gnss_cmdline_exit);
MODULE_AUTHOR("Alistair Delva <adelva@google.com>");
MODULE_DESCRIPTION("GNSS command line driver");
MODULE_LICENSE("GPL v2");

View file

@ -1158,7 +1158,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = 0;
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
user_addr = untagged_addr(*offset);
} else {
return -EINVAL;
}

View file

@ -299,6 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
uint32_t handle;
int r;
args->addr = untagged_addr(args->addr);
if (offset_in_page(args->addr | args->size))
return -EINVAL;

View file

@ -60,7 +60,6 @@ static void drm_client_close(struct drm_client_dev *client)
drm_file_free(client->file);
}
EXPORT_SYMBOL(drm_client_close);
/**
* drm_client_init - Initialise a DRM client

Some files were not shown because too many files have changed in this diff Show more