Merge android-4.19-q.84 (314ab78) into msm-4.19

* refs/heads/tmp-314ab78:
  Linux 4.19.84
  kvm: x86: mmu: Recovery of shattered NX large pages
  kvm: Add helper function for creating VM worker threads
  kvm: mmu: ITLB_MULTIHIT mitigation
  KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active
  KVM: x86: add tracepoints around __direct_map and FNAME(fetch)
  KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON
  KVM: x86: remove now unneeded hugepage gfn adjustment
  KVM: x86: make FNAME(fetch) and __direct_map more similar
  kvm: mmu: Do not release the page inside mmu_set_spte()
  kvm: Convert kvm_lock to a mutex
  kvm: x86, powerpc: do not allow clearing largepages debugfs entry
  Documentation: Add ITLB_MULTIHIT documentation
  cpu/speculation: Uninline and export CPU mitigations helpers
  x86/cpu: Add Tremont to the cpu vulnerability whitelist
  x86/bugs: Add ITLB_MULTIHIT bug infrastructure
  x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs
  x86/tsx: Add config options to set tsx=on|off|auto
  x86/speculation/taa: Add documentation for TSX Async Abort
  x86/tsx: Add "auto" option to the tsx= cmdline parameter
  kvm/x86: Export MDS_NO=0 to guests when TSX is enabled
  x86/speculation/taa: Add sysfs reporting for TSX Async Abort
  x86/speculation/taa: Add mitigation for TSX Async Abort
  x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default
  x86/cpu: Add a helper function x86_read_arch_cap_msr()
  x86/msr: Add the IA32_TSX_CTRL MSR
  KVM: x86: use Intel speculation bugs and features as derived in generic x86 code
  drm/i915/cmdparser: Fix jump whitelist clearing
  drm/i915/gen8+: Add RC6 CTX corruption WA
  drm/i915: Lower RM timeout to avoid DSI hard hangs
  drm/i915/cmdparser: Ignore Length operands during command matching
  drm/i915/cmdparser: Add support for backward jumps
  drm/i915/cmdparser: Use explicit goto for error paths
  drm/i915: Add gen9 BCS cmdparsing
  drm/i915: Allow parsing of unsized batches
  drm/i915: Support ro ppgtt mapped cmdparser shadow buffers
  drm/i915: Add support for mandatory cmdparsing
  drm/i915: Remove Master tables from cmdparser
  drm/i915: Disable Secure Batches for gen6+
  drm/i915: Rename gen7 cmdparser tables
  vsock/virtio: fix sock refcnt holding during the shutdown
  iio: imu: mpu6050: Fix FIFO layout for ICM20602
  net: prevent load/store tearing on sk->sk_stamp
  netfilter: ipset: Copy the right MAC address in hash:ip,mac IPv6 sets
  usbip: Fix free of unallocated memory in vhci tx
  cgroup,writeback: don't switch wbs immediately on dead wbs if the memcg is dead
  mm/filemap.c: don't initiate writeback if mapping has no dirty pages
  iio: imu: inv_mpu6050: fix no data on MPU6050
  iio: imu: mpu6050: Add support for the ICM 20602 IMU
  blkcg: make blkcg_print_stat() print stats only for online blkgs
  pinctrl: cherryview: Fix irq_valid_mask calculation
  ocfs2: protect extent tree in ocfs2_prepare_inode_for_write()
  pinctrl: intel: Avoid potential glitches if pin is in GPIO mode
  e1000: fix memory leaks
  igb: Fix constant media auto sense switching when no cable is connected
  net: ethernet: arc: add the missed clk_disable_unprepare
  NFSv4: Don't allow a cached open with a revoked delegation
  usb: dwc3: gadget: fix race when disabling ep with cancelled xfers
  hv_netvsc: Fix error handling in netvsc_attach()
  drm/amd/display: Passive DP->HDMI dongle detection fix
  drm/amdgpu: If amdgpu_ib_schedule fails return back the error.
  iommu/amd: Apply the same IVRS IOAPIC workaround to Acer Aspire A315-41
  net: mscc: ocelot: refuse to overwrite the port's native vlan
  net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up
  net: hisilicon: Fix "Trying to free already-free IRQ"
  fjes: Handle workqueue allocation failure
  nvme-multipath: fix possible io hang after ctrl reconnect
  scsi: qla2xxx: stop timer in shutdown path
  RDMA/hns: Prevent memory leaks of eq->buf_list
  RDMA/iw_cxgb4: Avoid freeing skb twice in arp failure case
  usbip: tools: Fix read_usb_vudc_device() error path handling
  USB: ldusb: use unsigned size format specifiers
  USB: Skip endpoints with 0 maxpacket length
  perf/x86/uncore: Fix event group support
  perf/x86/amd/ibs: Handle erratum #420 only on the affected CPU family (10h)
  perf/x86/amd/ibs: Fix reading of the IBS OpData register and thus precise RIP validity
  usb: dwc3: remove the call trace of USBx_GFLADJ
  usb: gadget: configfs: fix concurrent issue between composite APIs
  usb: dwc3: pci: prevent memory leak in dwc3_pci_probe
  usb: gadget: composite: Fix possible double free memory bug
  usb: gadget: udc: atmel: Fix interrupt storm in FIFO mode.
  usb: fsl: Check memory resource before releasing it
  macsec: fix refcnt leak in module exit routine
  bonding: fix unexpected IFF_BONDING bit unset
  ipvs: move old_secure_tcp into struct netns_ipvs
  ipvs: don't ignore errors in case refcounting ip_vs module fails
  netfilter: nf_flow_table: set timeout before insertion into hashes
  scsi: qla2xxx: Initialized mailbox to prevent driver load failure
  scsi: lpfc: Honor module parameter lpfc_use_adisc
  net: openvswitch: free vport unless register_netdevice() succeeds
  RDMA/uverbs: Prevent potential underflow
  scsi: qla2xxx: fixup incorrect usage of host_byte
  net/mlx5: prevent memory leak in mlx5_fpga_conn_create_cq
  net/mlx5e: TX, Fix consumer index of error cqe dump
  RDMA/qedr: Fix reported firmware version
  iw_cxgb4: fix ECN check on the passive accept
  RDMA/mlx5: Clear old rate limit when closing QP
  HID: intel-ish-hid: fix wrong error handling in ishtp_cl_alloc_tx_ring()
  dmaengine: sprd: Fix the possible memory leak issue
  dmaengine: xilinx_dma: Fix control reg update in vdma_channel_set_config
  HID: google: add magnemite/masterball USB ids
  PCI: tegra: Enable Relaxed Ordering only for Tegra20 & Tegra30
  usbip: Implement SG support to vhci-hcd and stub driver
  usbip: Fix vhci_urb_enqueue() URB null transfer buffer error path
  sched/fair: Fix -Wunused-but-set-variable warnings
  sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices
  ALSA: usb-audio: Fix copy&paste error in the validator
  ALSA: usb-audio: remove some dead code
  ALSA: usb-audio: Fix possible NULL dereference at create_yamaha_midi_quirk()
  ALSA: usb-audio: Clean up check_input_term()
  ALSA: usb-audio: Remove superfluous bLength checks
  ALSA: usb-audio: Unify the release of usb_mixer_elem_info objects
  ALSA: usb-audio: Simplify parse_audio_unit()
  ALSA: usb-audio: More validations of descriptor units
  configfs: fix a deadlock in configfs_symlink()
  configfs: provide exclusion between IO and removals
  configfs: new object reprsenting tree fragments
  configfs_register_group() shouldn't be (and isn't) called in rmdirable parts
  configfs: stash the data we need into configfs_buffer at open time
  can: peak_usb: fix slab info leak
  can: mcba_usb: fix use-after-free on disconnect
  can: dev: add missing of_node_put() after calling of_get_child_by_name()
  can: gs_usb: gs_can_open(): prevent memory leak
  can: rx-offload: can_rx_offload_queue_sorted(): fix error handling, avoid skb mem leak
  can: peak_usb: fix a potential out-of-sync while decoding packets
  can: c_can: c_can_poll(): only read status register after status IRQ
  can: flexcan: disable completely the ECC mechanism
  can: usb_8dev: fix use-after-free on disconnect
  SMB3: Fix persistent handles reconnect
  x86/apic/32: Avoid bogus LDR warnings
  intel_th: pci: Add Jasper Lake PCH support
  intel_th: pci: Add Comet Lake PCH support
  netfilter: ipset: Fix an error code in ip_set_sockfn_get()
  netfilter: nf_tables: Align nft_expr private data to 64-bit
  ARM: sunxi: Fix CPU powerdown on A83T
  iio: srf04: fix wrong limitation in distance measuring
  iio: imu: adis16480: make sure provided frequency is positive
  iio: adc: stm32-adc: fix stopping dma
  ceph: add missing check in d_revalidate snapdir handling
  ceph: fix use-after-free in __ceph_remove_cap()
  arm64: Do not mask out PTE_RDONLY in pte_same()
  soundwire: bus: set initial value to port_status
  soundwire: depend on ACPI
  HID: wacom: generic: Treat serial number and related fields as unsigned
  drm/radeon: fix si_enable_smc_cac() failed issue
  perf tools: Fix time sorting
  tools: gpio: Use !building_out_of_srctree to determine srctree
  dump_stack: avoid the livelock of the dump_lock
  mm, vmstat: hide /proc/pagetypeinfo from normal users
  mm: thp: handle page cache THP correctly in PageTransCompoundMap
  mm, meminit: recalculate pcpu batch and high limits after init completes
  mm: memcontrol: fix network errors from failing __GFP_ATOMIC charges
  ALSA: hda/ca0132 - Fix possible workqueue stall
  ALSA: bebob: fix to detect configured source of sampling clock for Focusrite Saffire Pro i/o series
  ALSA: timer: Fix incorrectly assigned timer instance
  net: hns: Fix the stray netpoll locks causing deadlock in NAPI path
  ipv6: fixes rt6_probe() and fib6_nh->last_probe init
  net: mscc: ocelot: fix NULL pointer on LAG slave removal
  net: mscc: ocelot: don't handle netdev events for other netdevs
  qede: fix NULL pointer deref in __qede_remove()
  NFC: st21nfca: fix double free
  nfc: netlink: fix double device reference drop
  NFC: fdp: fix incorrect free object
  net: usb: qmi_wwan: add support for DW5821e with eSIM support
  net: qualcomm: rmnet: Fix potential UAF when unregistering
  net: fix data-race in neigh_event_send()
  net: ethernet: octeon_mgmt: Account for second possible VLAN header
  ipv4: Fix table id reference in fib_sync_down_addr
  CDC-NCM: handle incomplete transfer of MTU
  bonding: fix state transition issue in link monitoring
  Linux 4.19.83
  usb: gadget: udc: core: Fix segfault if udc_bind_to_driver() for pending driver fails
  arm64: dts: ti: k3-am65-main: Fix gic-its node unit-address
  ASoC: pcm3168a: The codec does not support S32_LE
  selftests/powerpc: Fix compile error on tlbie_test due to newer gcc
  selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue
  powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
  platform/x86: pmc_atom: Add Siemens SIMATIC IPC227E to critclk_systems DMI table
  wireless: Skip directory when generating certificates
  net/flow_dissector: switch to siphash
  r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2
  net: dsa: fix switch tree list
  net: usb: lan78xx: Connect PHY before registering MAC
  net: bcmgenet: reset 40nm EPHY on energy detect
  net: phy: bcm7xxx: define soft_reset for 40nm EPHY
  net: bcmgenet: don't set phydev->link from MAC
  net: dsa: b53: Do not clear existing mirrored port mask
  net/mlx5e: Fix ethtool self test: link speed
  r8169: fix wrong PHY ID issue with RTL8168dp
  net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget
  selftests: fib_tests: add more tests for metric update
  ipv4: fix route update on metric change.
  net: add READ_ONCE() annotation in __skb_wait_for_more_packets()
  net: use skb_queue_empty_lockless() in busy poll contexts
  net: use skb_queue_empty_lockless() in poll() handlers
  udp: use skb_queue_empty_lockless()
  net: add skb_queue_empty_lockless()
  vxlan: check tun_info options_len properly
  udp: fix data-race in udp_set_dev_scratch()
  selftests: net: reuseport_dualstack: fix uninitalized parameter
  net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol()
  net: usb: lan78xx: Disable interrupts before calling generic_handle_irq()
  netns: fix GFP flags in rtnl_net_notifyid()
  net/mlx4_core: Dynamically set guaranteed amount of counters per VF
  net: hisilicon: Fix ping latency when deal with high throughput
  net: fix sk_page_frag() recursion from memory reclaim
  net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum
  net: dsa: bcm_sf2: Fix IMP setup for port different than 8
  net: annotate lockless accesses to sk->sk_napi_id
  net: annotate accesses to sk->sk_incoming_cpu
  inet: stop leaking jiffies on the wire
  erspan: fix the tun_info options_len check for erspan
  dccp: do not leak jiffies on the wire
  cxgb4: fix panic when attaching to ULD fail
  nbd: handle racing with error'ed out commands
  nbd: protect cmd->status with cmd->lock
  cifs: Fix cifsInodeInfo lock_sem deadlock when reconnect occurs
  i2c: stm32f7: remove warning when compiling with W=1
  i2c: stm32f7: fix a race in slave mode with arbitration loss irq
  i2c: stm32f7: fix first byte to send in slave mode
  irqchip/gic-v3-its: Use the exact ITSList for VMOVP
  MIPS: bmips: mark exception vectors as char arrays
  of: unittest: fix memory leak in unittest_data_add
  ARM: 8926/1: v7m: remove register save to stack before svc
  tracing: Fix "gfp_t" format for synthetic events
  scsi: target: core: Do not overwrite CDB byte 1
  drm/amdgpu: fix potential VM faults
  ARM: davinci: dm365: Fix McBSP dma_slave_map entry
  perf kmem: Fix memory leak in compact_gfp_flags()
  8250-men-mcb: fix error checking when get_num_ports returns -ENODEV
  perf c2c: Fix memory leak in build_cl_output()
  ARM: dts: imx7s: Correct GPT's ipg clock source
  scsi: fix kconfig dependency warning related to 53C700_LE_ON_BE
  scsi: sni_53c710: fix compilation error
  scsi: scsi_dh_alua: handle RTPG sense code correctly during state transitions
  scsi: qla2xxx: fix a potential NULL pointer dereference
  ARM: mm: fix alignment handler faults under memory pressure
  pinctrl: ns2: Fix off by one bugs in ns2_pinmux_enable()
  ARM: dts: logicpd-torpedo-som: Remove twl_keypad
  ASoc: rockchip: i2s: Fix RPM imbalance
  ASoC: wm_adsp: Don't generate kcontrols without READ flags
  regulator: pfuze100-regulator: Variable "val" in pfuze100_regulator_probe() could be uninitialized
  ASoC: rt5682: add NULL handler to set_jack function
  regulator: ti-abb: Fix timeout in ti_abb_wait_txdone/ti_abb_clear_all_txdone
  arm64: dts: Fix gpio to pinmux mapping
  arm64: dts: allwinner: a64: sopine-baseboard: Add PHY regulator delay
  arm64: dts: allwinner: a64: pine64-plus: Add PHY regulator delay
  ASoC: wm8994: Do not register inapplicable controls for WM1811
  regulator: of: fix suspend-min/max-voltage parsing
  kbuild: add -fcf-protection=none when using retpoline flags
  Linux 4.19.82
  Revert "ALSA: hda: Flush interrupts on disabling"
  powerpc/powernv: Fix CPU idle to be called with IRQs disabled
  ALSA: usb-audio: Add DSD support for Gustard U16/X26 USB Interface
  ALSA: usb-audio: Update DSD support quirks for Oppo and Rotel
  ALSA: usb-audio: DSD auto-detection for Playback Designs
  ALSA: timer: Fix mutex deadlock at releasing card
  ALSA: timer: Simplify error path in snd_timer_open()
  sch_netem: fix rcu splat in netem_enqueue()
  net: usb: sr9800: fix uninitialized local variable
  bonding: fix potential NULL deref in bond_update_slave_arr
  NFC: pn533: fix use-after-free and memleaks
  rxrpc: Fix trace-after-put looking at the put peer record
  rxrpc: rxrpc_peer needs to hold a ref on the rxrpc_local record
  rxrpc: Fix call ref leak
  llc: fix sk_buff leak in llc_conn_service()
  llc: fix sk_buff leak in llc_sap_state_process()
  batman-adv: Avoid free/alloc race when handling OGM buffer
  NFS: Fix an RCU lock leak in nfs4_refresh_delegation_stateid()
  drm/amdgpu/powerplay/vega10: allow undervolting in p7
  dmaengine: cppi41: Fix cppi41_dma_prep_slave_sg() when idle
  dmaengine: qcom: bam_dma: Fix resource leak
  rtlwifi: Fix potential overflow on P2P code
  arm64: Ensure VM_WRITE|VM_SHARED ptes are clean by default
  s390/idle: fix cpu idle time calculation
  s390/cmm: fix information leak in cmm_timeout_handler()
  nl80211: fix validation of mesh path nexthop
  HID: fix error message in hid_open_report()
  HID: Fix assumption that devices have inputs
  HID: i2c-hid: add Trekstor Primebook C11B to descriptor override
  scsi: target: cxgbit: Fix cxgbit_fw4_ack()
  USB: serial: whiteheat: fix line-speed endianness
  USB: serial: whiteheat: fix potential slab corruption
  usb: xhci: fix __le32/__le64 accessors in debugfs code
  USB: ldusb: fix control-message timeout
  USB: ldusb: fix ring-buffer locking
  usb-storage: Revert commit 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows")
  USB: gadget: Reject endpoints with 0 maxpacket value
  UAS: Revert commit 3ae62a42090f ("UAS: fix alignment of scatter/gather segments")
  ALSA: hda/realtek - Add support for ALC623
  ALSA: hda/realtek - Fix 2 front mics of codec 0x623
  ALSA: bebob: Fix prototype of helper function to return negative value
  fuse: truncate pending writes on O_TRUNC
  fuse: flush dirty data/metadata before non-truncate setattr
  ath6kl: fix a NULL-ptr-deref bug in ath6kl_usb_alloc_urb_from_pipe()
  thunderbolt: Use 32-bit writes when writing ring producer/consumer
  USB: legousbtower: fix a signedness bug in tower_probe()
  nbd: verify socket is supported during setup
  iwlwifi: exclude GEO SAR support for 3168
  ALSA: hda/realtek: Reduce the Headphone static noise on XPS 9350/9360
  ARM: 8914/1: NOMMU: Fix exc_ret for XIP
  tracing: Initialize iter->seq after zeroing in tracing_read_pipe()
  s390/uaccess: avoid (false positive) compiler warnings
  NFSv4: Fix leak of clp->cl_acceptor string
  nbd: fix possible sysfs duplicate warning
  virt: vbox: fix memory leak in hgcm_call_preprocess_linaddr
  MIPS: fw: sni: Fix out of bounds init of o32 stack
  MIPS: include: Mark __xchg as __always_inline
  iio: imu: adis16400: release allocated memory on failure
  drm/amdgpu: fix memory leak
  perf/x86/amd: Change/fix NMI latency mitigation to use a timestamp
  sched/vtime: Fix guest/system mis-accounting on task switch
  x86/cpu: Add Comet Lake to the Intel CPU models header
  arm64: armv8_deprecated: Checking return value for memory allocation
  fs: ocfs2: fix a possible null-pointer dereference in ocfs2_info_scan_inode_alloc()
  fs: ocfs2: fix a possible null-pointer dereference in ocfs2_write_end_nolock()
  fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()
  ocfs2: clear zero in unaligned direct IO
  x86/xen: Return from panic notifier
  MIPS: include: Mark __cmpxchg as __always_inline
  efi/x86: Do not clean dummy variable in kexec path
  efi/cper: Fix endianness of PCIe class code
  serial: mctrl_gpio: Check for NULL pointer
  fs: cifs: mute -Wunused-const-variable message
  gpio: max77620: Use correct unit for debounce times
  tty: n_hdlc: fix build on SPARC
  tty: serial: owl: Fix the link time qualifier of 'owl_uart_exit()'
  arm64: ftrace: Ensure synchronisation in PLT setup for Neoverse-N1 #1542419
  nfs: Fix nfsi->nrequests count error on nfs_inode_remove_request
  HID: hyperv: Use in-place iterator API in the channel callback
  RDMA/iwcm: Fix a lock inversion issue
  RDMA/hfi1: Prevent memory leak in sdma_init
  staging: rtl8188eu: fix null dereference when kzalloc fails
  perf annotate: Return appropriate error code for allocation failures
  perf annotate: Propagate the symbol__annotate() error return
  perf annotate: Fix the signedness of failure returns
  perf annotate: Propagate perf_env__arch() error
  perf tools: Propagate get_cpuid() error
  perf jevents: Fix period for Intel fixed counters
  perf script brstackinsn: Fix recovery from LBR/binary mismatch
  perf map: Fix overlapped map handling
  perf tests: Avoid raising SEGV using an obvious NULL dereference
  libsubcmd: Make _FORTIFY_SOURCE defines dependent on the feature
  iio: fix center temperature of bmc150-accel-core
  iio: adc: meson_saradc: Fix memory allocation order
  power: supply: max14656: fix potential use-after-free
  drm/amd/display: fix odm combine pipe reset
  PCI/PME: Fix possible use-after-free on remove
  net: dsa: mv88e6xxx: Release lock while requesting IRQ
  exec: load_script: Do not exec truncated interpreter path
  ext4: disallow files with EXT4_JOURNAL_DATA_FL from EXT4_IOC_SWAP_BOOT
  media: vimc: Remove unused but set variables
  ALSA: hda/realtek - Apply ALC294 hp init also for S4 resume
  cifs: add credits from unmatched responses/messages
  CIFS: Respect SMB2 hdr preamble size in read responses
  scsi: lpfc: Correct localport timeout duration error
  mlxsw: spectrum: Set LAG port collector only when active
  arm64: kpti: Whitelist HiSilicon Taishan v110 CPUs
  arm64: Add MIDR encoding for HiSilicon Taishan CPUs
  rtc: pcf8523: set xtal load capacitance from DT
  usb: handle warm-reset port requests on hub resume
  ALSA: usb-audio: Cleanup DSD whitelist
  usb: dwc3: gadget: clear DWC3_EP_TRANSFER_STARTED on cmd complete
  usb: dwc3: gadget: early giveback if End Transfer already completed
  samples: bpf: fix: seg fault with NULL pointer arg
  HID: steam: fix deadlock with input devices.
  HID: steam: fix boot loop with bluetooth firmware
  NFSv4: Ensure that the state manager exits the loop on SIGKILL
  HID: Add ASUS T100CHI keyboard dock battery quirks
  staging: mt7621-pinctrl: use pinconf-generic for 'dt_node_to_map' and 'dt_free_map'
  scripts/setlocalversion: Improve -dirty check with git-status --no-optional-locks
  clk: boston: unregister clks on failure in clk_boston_setup()
  ath10k: assign 'n_cipher_suites = 11' for WCN3990 to enable WPA3
  platform/x86: Fix config space access for intel_atomisp2_pm
  platform/x86: Add the VLV ISP PCI ID to atomisp2_pm
  HID: i2c-hid: Add Odys Winbook 13 to descriptor override
  HID: i2c-hid: Ignore input report if there's no data present on Elan touchpanels
  HID: i2c-hid: Disable runtime PM for LG touchscreen
  netfilter: ipset: Make invalid MAC address checks consistent
  Btrfs: fix deadlock on tree root leaf when finding free extent
  PCI: Fix Switchtec DMA aliasing quirk dmesg noise
  bcache: fix input overflow to writeback_rate_minimum
  drm/msm/dpu: handle failures while initializing displays
  x86/cpu: Add Atom Tremont (Jacobsville)
  tools/power turbostat: fix goldmont C-state limit decoding
  usb: dwc2: fix unbalanced use of external vbus-supply
  HID: i2c-hid: add Direkt-Tek DTLAPY133-1 to descriptor override
  f2fs: fix to recover inode->i_flags of inode block during POR
  f2fs: fix to recover inode's i_gc_failures during POR
  powerpc/powernv: hold device_hotplug_lock when calling memtrace_offline_pages()
  sc16is7xx: Fix for "Unexpected interrupt: 8"
  scsi: lpfc: Fix a duplicate 0711 log message number.
  f2fs: flush quota blocks after turnning it off
  wil6210: fix freeing of rx buffers in EDMA mode
  btrfs: tracepoints: Fix wrong parameter order for qgroup events
  btrfs: qgroup: Always free PREALLOC META reserve in btrfs_delalloc_release_extents()
  Btrfs: fix memory leak due to concurrent append writes with fiemap
  Btrfs: fix inode cache block reserve leak on failure to allocate data space
  dm snapshot: rework COW throttling to fix deadlock
  dm snapshot: introduce account_start_copy() and account_end_copy()
  zram: fix race between backing_dev_show and backing_dev_store

Conflicts:
	arch/arm64/include/asm/cputype.h
	drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
	drivers/net/wireless/ath/wil6210/txrx_edma.c
	drivers/usb/dwc3/gadget.c
	include/linux/cpu.h
	kernel/cpu.c

Following USB commits were reverted on importing android-4.19.57
into msm-4.19 due to BootTimeRunner failure. android-4.19-q.82
introduced new usb changes [1] that fixed the regression, hence it
is safe to restore the reverts. It is done in this merge.

  9c423fd89("usb: dwc3: Reset num_trbs after skipping")
  385cacd95("usb: dwc3: gadget: Clear req->needs_extra_trb flag on cleanup")
  6edcdd0e6("usb: dwc3: gadget: remove wait_end_transfer")
  d7ff2e3ff("usb: dwc3: gadget: move requests to cancelled_list")
  bba5f9878("usb: dwc3: gadget: introduce cancelled_list")
  65e1f3403("usb: dwc3: gadget: extract dwc3_gadget_ep_skip_trbs()")
  56092bd50("usb: dwc3: gadget: use num_trbs when skipping TRBs on->dequeue()")
  2a2b1c4dc("usb: dwc3: gadget: track number of TRBs per request")
  420b1237c("usb: dwc3: gadget: combine unaligned and zero flags")
  62805d319("Revert "usb: dwc3: gadget: Clear req->needs_extra_trb flag on cleanup"")

[1]
  a0608eec29("usb: dwc3: gadget: clear DWC3_EP_TRANSFER_STARTED on cmd complete")
  d0e8b35e91("usb: dwc3: gadget: early giveback if End Transfer already completed")

Change-Id: I77c3490d2c1cf7c8233a7e797c6f217f737621a2
Signed-off-by: Ivaylo Georgiev <irgeorgiev@codeaurora.org>
This commit is contained in:
Ivaylo Georgiev 2020-01-28 01:25:51 -08:00
commit b434e4bcd4
447 changed files with 7635 additions and 2691 deletions

View file

@ -478,6 +478,8 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities

View file

@ -12,3 +12,5 @@ are configurable at compile, boot or run time.
spectre
l1tf
mds
tsx_async_abort
multihit.rst

View file

@ -0,0 +1,163 @@
iTLB multihit
=============
iTLB multihit is an erratum where some processors may incur a machine check
error, possibly resulting in an unrecoverable CPU lockup, when an
instruction fetch hits multiple entries in the instruction TLB. This can
occur when the page size is changed along with either the physical address
or cache type. A malicious guest running on a virtualized system can
exploit this erratum to perform a denial of service attack.
Affected processors
-------------------
Variations of this erratum are present on most Intel Core and Xeon processor
models. The erratum is not present on:
- non-Intel processors
- Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
- Intel processors that have the PSCHANGE_MC_NO bit set in the
IA32_ARCH_CAPABILITIES MSR.
Related CVEs
------------
The following CVE entry is related to this issue:
============== =================================================
CVE-2018-12207 Machine Check Error Avoidance on Page Size Change
============== =================================================
Problem
-------
Privileged software, including OS and virtual machine managers (VMM), are in
charge of memory management. A key component in memory management is the control
of the page tables. Modern processors use virtual memory, a technique that creates
the illusion of a very large memory for processors. This virtual space is split
into pages of a given size. Page tables translate virtual addresses to physical
addresses.
To reduce latency when performing a virtual to physical address translation,
processors include a structure, called TLB, that caches recent translations.
There are separate TLBs for instruction (iTLB) and data (dTLB).
Under this errata, instructions are fetched from a linear address translated
using a 4 KB translation cached in the iTLB. Privileged software modifies the
paging structure so that the same linear address using large page size (2 MB, 4
MB, 1 GB) with a different physical address or memory type. After the page
structure modification but before the software invalidates any iTLB entries for
the linear address, a code fetch that happens on the same linear address may
cause a machine-check error which can result in a system hang or shutdown.
Attack scenarios
----------------
Attacks against the iTLB multihit erratum can be mounted from malicious
guests in a virtualized system.
iTLB multihit system information
--------------------------------
The Linux kernel provides a sysfs interface to enumerate the current iTLB
multihit status of the system:whether the system is vulnerable and which
mitigations are active. The relevant sysfs file is:
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
The possible values in this file are:
.. list-table::
* - Not affected
- The processor is not vulnerable.
* - KVM: Mitigation: Split huge pages
- Software changes mitigate this issue.
* - KVM: Vulnerable
- The processor is vulnerable, but no mitigation enabled
Enumeration of the erratum
--------------------------------
A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
and will be set on CPU's which are mitigated against this issue.
======================================= =========== ===============================
IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model
IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model
IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable
======================================= =========== ===============================
Mitigation mechanism
-------------------------
This erratum can be mitigated by restricting the use of large page sizes to
non-executable pages. This forces all iTLB entries to be 4K, and removes
the possibility of multiple hits.
In order to mitigate the vulnerability, KVM initially marks all huge pages
as non-executable. If the guest attempts to execute in one of those pages,
the page is broken down into 4K pages, which are then marked executable.
If EPT is disabled or not available on the host, KVM is in control of TLB
flushes and the problematic situation cannot happen. However, the shadow
EPT paging mechanism used by nested virtualization is vulnerable, because
the nested guest can trigger multiple iTLB hits by modifying its own
(non-nested) page tables. For simplicity, KVM will make large pages
non-executable in all shadow paging modes.
Mitigation control on the kernel command line and KVM - module parameter
------------------------------------------------------------------------
The KVM hypervisor mitigation mechanism for marking huge pages as
non-executable can be controlled with a module parameter "nx_huge_pages=".
The kernel command line allows to control the iTLB multihit mitigations at
boot time with the option "kvm.nx_huge_pages=".
The valid arguments for these options are:
========== ================================================================
force Mitigation is enabled. In this case, the mitigation implements
non-executable huge pages in Linux kernel KVM module. All huge
pages in the EPT are marked as non-executable.
If a guest attempts to execute in one of those pages, the page is
broken down into 4K pages, which are then marked executable.
off Mitigation is disabled.
auto Enable mitigation only if the platform is affected and the kernel
was not booted with the "mitigations=off" command line parameter.
This is the default option.
========== ================================================================
Mitigation selection guide
--------------------------
1. No virtualization in use
^^^^^^^^^^^^^^^^^^^^^^^^^^^
The system is protected by the kernel unconditionally and no further
action is required.
2. Virtualization with trusted guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If the guest comes from a trusted source, you may assume that the guest will
not attempt to maliciously exploit these errata and no further action is
required.
3. Virtualization with untrusted guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If the guest comes from an untrusted source, the guest host kernel will need
to apply iTLB multihit mitigation via the kernel command line or kvm
module parameter.

View file

@ -0,0 +1,276 @@
.. SPDX-License-Identifier: GPL-2.0
TAA - TSX Asynchronous Abort
======================================
TAA is a hardware vulnerability that allows unprivileged speculative access to
data which is available in various CPU internal buffers by using asynchronous
aborts within an Intel TSX transactional region.
Affected processors
-------------------
This vulnerability only affects Intel processors that support Intel
Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit
(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
also mitigate against TAA.
Whether a processor is affected or not can be read out from the TAA
vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
Related CVEs
------------
The following CVE entry is related to this TAA issue:
============== ===== ===================================================
CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some
microprocessors utilizing speculative execution may
allow an authenticated user to potentially enable
information disclosure via a side channel with
local access.
============== ===== ===================================================
Problem
-------
When performing store, load or L1 refill operations, processors write
data into temporary microarchitectural structures (buffers). The data in
those buffers can be forwarded to load operations as an optimization.
Intel TSX is an extension to the x86 instruction set architecture that adds
hardware transactional memory support to improve performance of multi-threaded
software. TSX lets the processor expose and exploit concurrency hidden in an
application due to dynamically avoiding unnecessary synchronization.
TSX supports atomic memory transactions that are either committed (success) or
aborted. During an abort, operations that happened within the transactional region
are rolled back. An asynchronous abort takes place, among other options, when a
different thread accesses a cache line that is also used within the transactional
region when that access might lead to a data race.
Immediately after an uncompleted asynchronous abort, certain speculatively
executed loads may read data from those internal buffers and pass it to dependent
operations. This can be then used to infer the value via a cache side channel
attack.
Because the buffers are potentially shared between Hyper-Threads cross
Hyper-Thread attacks are possible.
The victim of a malicious actor does not need to make use of TSX. Only the
attacker needs to begin a TSX transaction and raise an asynchronous abort
which in turn potenitally leaks data stored in the buffers.
More detailed technical information is available in the TAA specific x86
architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
Attack scenarios
----------------
Attacks against the TAA vulnerability can be implemented from unprivileged
applications running on hosts or guests.
As for MDS, the attacker has no control over the memory addresses that can
be leaked. Only the victim is responsible for bringing data to the CPU. As
a result, the malicious actor has to sample as much data as possible and
then postprocess it to try to infer any useful information from it.
A potential attacker only has read access to the data. Also, there is no direct
privilege escalation by using this technique.
.. _tsx_async_abort_sys_info:
TAA system information
-----------------------
The Linux kernel provides a sysfs interface to enumerate the current TAA status
of mitigated systems. The relevant sysfs file is:
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
The possible values in this file are:
.. list-table::
* - 'Vulnerable'
- The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
* - 'Vulnerable: Clear CPU buffers attempted, no microcode'
- The system tries to clear the buffers but the microcode might not support the operation.
* - 'Mitigation: Clear CPU buffers'
- The microcode has been updated to clear the buffers. TSX is still enabled.
* - 'Mitigation: TSX disabled'
- TSX is disabled.
* - 'Not affected'
- The CPU is not affected by this issue.
.. _ucode_needed:
Best effort mitigation mode
^^^^^^^^^^^^^^^^^^^^^^^^^^^
If the processor is vulnerable, but the availability of the microcode-based
mitigation mechanism is not advertised via CPUID the kernel selects a best
effort mitigation mode. This mode invokes the mitigation instructions
without a guarantee that they clear the CPU buffers.
This is done to address virtualization scenarios where the host has the
microcode update applied, but the hypervisor is not yet updated to expose the
CPUID to the guest. If the host has updated microcode the protection takes
effect; otherwise a few CPU cycles are wasted pointlessly.
The state in the tsx_async_abort sysfs file reflects this situation
accordingly.
Mitigation mechanism
--------------------
The kernel detects the affected CPUs and the presence of the microcode which is
required. If a CPU is affected and the microcode is available, then the kernel
enables the mitigation by default.
The mitigation can be controlled at boot time via a kernel command line option.
See :ref:`taa_mitigation_control_command_line`.
.. _virt_mechanism:
Virtualization mitigation
^^^^^^^^^^^^^^^^^^^^^^^^^
Affected systems where the host has TAA microcode and TAA is mitigated by
having disabled TSX previously, are not vulnerable regardless of the status
of the VMs.
In all other cases, if the host either does not have the TAA microcode or
the kernel is not mitigated, the system might be vulnerable.
.. _taa_mitigation_control_command_line:
Mitigation control on the kernel command line
---------------------------------------------
The kernel command line allows to control the TAA mitigations at boot time with
the option "tsx_async_abort=". The valid arguments for this option are:
============ =============================================================
off This option disables the TAA mitigation on affected platforms.
If the system has TSX enabled (see next parameter) and the CPU
is affected, the system is vulnerable.
full TAA mitigation is enabled. If TSX is enabled, on an affected
system it will clear CPU buffers on ring transitions. On
systems which are MDS-affected and deploy MDS mitigation,
TAA is also mitigated. Specifying this option on those
systems will have no effect.
full,nosmt The same as tsx_async_abort=full, with SMT disabled on
vulnerable CPUs that have TSX enabled. This is the complete
mitigation. When TSX is disabled, SMT is not disabled because
CPU is not vulnerable to cross-thread TAA attacks.
============ =============================================================
Not specifying this option is equivalent to "tsx_async_abort=full".
The kernel command line also allows to control the TSX feature using the
parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
to control the TSX feature and the enumeration of the TSX feature bits (RTM
and HLE) in CPUID.
The valid options are:
============ =============================================================
off Disables TSX on the system.
Note that this option takes effect only on newer CPUs which are
not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
and which get the new IA32_TSX_CTRL MSR through a microcode
update. This new MSR allows for the reliable deactivation of
the TSX functionality.
on Enables TSX.
Although there are mitigations for all known security
vulnerabilities, TSX has been known to be an accelerator for
several previous speculation-related CVEs, and so there may be
unknown security risks associated with leaving it enabled.
auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
on the system.
============ =============================================================
Not specifying this option is equivalent to "tsx=off".
The following combinations of the "tsx_async_abort" and "tsx" are possible. For
affected platforms tsx=auto is equivalent to tsx=off and the result will be:
========= ========================== =========================================
tsx=on tsx_async_abort=full The system will use VERW to clear CPU
buffers. Cross-thread attacks are still
possible on SMT machines.
tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT
mitigated.
tsx=on tsx_async_abort=off The system is vulnerable.
tsx=off tsx_async_abort=full TSX might be disabled if microcode
provides a TSX control MSR. If so,
system is not vulnerable.
tsx=off tsx_async_abort=full,nosmt Ditto
tsx=off tsx_async_abort=off ditto
========= ========================== =========================================
For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
"tsx" command line argument has no effect.
For the affected platforms below table indicates the mitigation status for the
combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
and TSX_CTRL_MSR.
======= ========= ============= ========================================
MDS_NO MD_CLEAR TSX_CTRL_MSR Status
======= ========= ============= ========================================
0 0 0 Vulnerable (needs microcode)
0 1 0 MDS and TAA mitigated via VERW
1 1 0 MDS fixed, TAA vulnerable if TSX enabled
because MD_CLEAR has no meaning and
VERW is not guaranteed to clear buffers
1 X 1 MDS fixed, TAA can be mitigated by
VERW or TSX_CTRL_MSR
======= ========= ============= ========================================
Mitigation selection guide
--------------------------
1. Trusted userspace and guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If all user space applications are from a trusted source and do not execute
untrusted code which is supplied externally, then the mitigation can be
disabled. The same applies to virtualized environments with trusted guests.
2. Untrusted userspace and guests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If there are untrusted applications or guests on the system, enabling TSX
might allow a malicious actor to leak data from the host or from other
processes running on the same physical core.
If the microcode is available and the TSX is disabled on the host, attacks
are prevented in a virtualized environment as well, even if the VMs do not
explicitly enable the mitigation.
.. _taa_default_mitigations:
Default mitigations
-------------------
The kernel's default action for vulnerable processors is:
- Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).

View file

@ -1963,6 +1963,25 @@
KVM MMU at runtime.
Default is 0 (off)
kvm.nx_huge_pages=
[KVM] Controls the software workaround for the
X86_BUG_ITLB_MULTIHIT bug.
force : Always deploy workaround.
off : Never deploy workaround.
auto : Deploy workaround based on the presence of
X86_BUG_ITLB_MULTIHIT.
Default is 'auto'.
If the software workaround is enabled for the host,
guests do need not to enable it for nested guests.
kvm.nx_huge_pages_recovery_ratio=
[KVM] Controls how many 4KiB pages are periodically zapped
back to huge pages. 0 disables the recovery, otherwise if
the value is N KVM will zap 1/Nth of the 4KiB pages every
minute. The default is 60.
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
Default is 1 (enabled)
@ -2530,6 +2549,13 @@
ssbd=force-off [ARM64]
l1tf=off [X86]
mds=off [X86]
tsx_async_abort=off [X86]
kvm.nx_huge_pages=off [X86]
Exceptions:
This does not have any effect on
kvm.nx_huge_pages when
kvm.nx_huge_pages=force.
auto (default)
Mitigate all CPU vulnerabilities, but leave SMT
@ -2545,6 +2571,7 @@
be fully mitigated, even if it means losing SMT.
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@ -4703,6 +4730,71 @@
marks the TSC unconditionally unstable at bootup and
avoids any further wobbles once the TSC watchdog notices.
tsx= [X86] Control Transactional Synchronization
Extensions (TSX) feature in Intel processors that
support TSX control.
This parameter controls the TSX feature. The options are:
on - Enable TSX on the system. Although there are
mitigations for all known security vulnerabilities,
TSX has been known to be an accelerator for
several previous speculation-related CVEs, and
so there may be unknown security risks associated
with leaving it enabled.
off - Disable TSX on the system. (Note that this
option takes effect only on newer CPUs which are
not vulnerable to MDS, i.e., have
MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
the new IA32_TSX_CTRL MSR through a microcode
update. This new MSR allows for the reliable
deactivation of the TSX functionality.)
auto - Disable TSX if X86_BUG_TAA is present,
otherwise enable TSX on the system.
Not specifying this option is equivalent to tsx=off.
See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
for more details.
tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
Abort (TAA) vulnerability.
Similar to Micro-architectural Data Sampling (MDS)
certain CPUs that support Transactional
Synchronization Extensions (TSX) are vulnerable to an
exploit against CPU internal buffers which can forward
information to a disclosure gadget under certain
conditions.
In vulnerable processors, the speculatively forwarded
data can be used in a cache side channel attack, to
access data to which the attacker does not have direct
access.
This parameter controls the TAA mitigation. The
options are:
full - Enable TAA mitigation on vulnerable CPUs
if TSX is enabled.
full,nosmt - Enable TAA mitigation and disable SMT on
vulnerable CPUs. If TSX is disabled, SMT
is not disabled because CPU is not
vulnerable to cross-thread TAA attacks.
off - Unconditionally disable TAA mitigation
Not specifying this option is equivalent to
tsx_async_abort=full. On CPUs which are MDS affected
and deploy MDS mitigation, TAA mitigation is not
required and doesn't provide any additional
mitigation.
For details see:
Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
Format:
@ -5130,6 +5222,10 @@
the unplug protocol
never -- do not unplug even if version check succeeds
xen_legacy_crash [X86,XEN]
Crash from Xen panic notifier, without executing late
panic() code such as dumping handler.
xen_nopvspin [X86,XEN]
Disables the ticketlock slowpath using Xen PV
optimizations.

View file

@ -90,6 +90,51 @@ There are two ways in which a group may become throttled:
In case b) above, even though the child may have runtime remaining it will not
be allowed to until the parent's runtime is refreshed.
CFS Bandwidth Quota Caveats
---------------------------
Once a slice is assigned to a cpu it does not expire. However all but 1ms of
the slice may be returned to the global pool if all threads on that cpu become
unrunnable. This is configured at compile time by the min_cfs_rq_runtime
variable. This is a performance tweak that helps prevent added contention on
the global lock.
The fact that cpu-local slices do not expire results in some interesting corner
cases that should be understood.
For cgroup cpu constrained applications that are cpu limited this is a
relatively moot point because they will naturally consume the entirety of their
quota as well as the entirety of each cpu-local slice in each period. As a
result it is expected that nr_periods roughly equal nr_throttled, and that
cpuacct.usage will increase roughly equal to cfs_quota_us in each period.
For highly-threaded, non-cpu bound applications this non-expiration nuance
allows applications to briefly burst past their quota limits by the amount of
unused slice on each cpu that the task group is running on (typically at most
1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only
applies if quota had been assigned to a cpu and then not fully used or returned
in previous periods. This burst amount will not be transferred between cores.
As a result, this mechanism still strictly limits the task group to quota
average usage, albeit over a longer time window than a single period. This
also limits the burst ability to no more than 1ms per cpu. This provides
better more predictable user experience for highly threaded applications with
small quota limits on high core count machines. It also eliminates the
propensity to throttle these applications while simultanously using less than
quota amounts of cpu. Another way to say this, is that by allowing the unused
portion of a slice to remain valid across periods we have decreased the
possibility of wastefully expiring quota on cpu-local silos that don't need a
full slice's amount of cpu time.
The interaction between cpu-bound and non-cpu-bound-interactive applications
should also be considered, especially when single core usage hits 100%. If you
gave each of these applications half of a cpu-core and they both got scheduled
on the same CPU it is theoretically possible that the non-cpu bound application
will use up to 1ms additional quota in some periods, thereby preventing the
cpu-bound application from fully using its quota by that same amount. In these
instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to
decide which application is chosen to run, as they will both be runnable and
have remaining quota. This runtime discrepancy will be made up in the following
periods when the interactive application idles.
Examples
--------
1. Limit a group to 1 CPU worth of runtime.

View file

@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
------------
Name: kvm_lock
Type: spinlock_t
Type: mutex
Arch: any
Protects: - vm_list

View file

@ -6,3 +6,4 @@ x86 architecture specifics
:maxdepth: 1
mds
tsx_async_abort

View file

@ -0,0 +1,117 @@
.. SPDX-License-Identifier: GPL-2.0
TSX Async Abort (TAA) mitigation
================================
.. _tsx_async_abort:
Overview
--------
TSX Async Abort (TAA) is a side channel attack on internal buffers in some
Intel processors similar to Microachitectural Data Sampling (MDS). In this
case certain loads may speculatively pass invalid data to dependent operations
when an asynchronous abort condition is pending in a Transactional
Synchronization Extensions (TSX) transaction. This includes loads with no
fault or assist condition. Such loads may speculatively expose stale data from
the same uarch data structures as in MDS, with same scope of exposure i.e.
same-thread and cross-thread. This issue affects all current processors that
support TSX.
Mitigation strategy
-------------------
a) TSX disable - one of the mitigations is to disable TSX. A new MSR
IA32_TSX_CTRL will be available in future and current processors after
microcode update which can be used to disable TSX. In addition, it
controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
vulnerability. More details on this approach can be found in
:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
Kernel internal mitigation modes
--------------------------------
============= ============================================================
off Mitigation is disabled. Either the CPU is not affected or
tsx_async_abort=off is supplied on the kernel command line.
tsx disabled Mitigation is enabled. TSX feature is disabled by default at
bootup on processors that support TSX control.
verw Mitigation is enabled. CPU is affected and MD_CLEAR is
advertised in CPUID.
ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not
advertised in CPUID. That is mainly for virtualization
scenarios where the host has the updated microcode but the
hypervisor does not expose MD_CLEAR in CPUID. It's a best
effort approach without guarantee.
============= ============================================================
If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
not provided then the kernel selects an appropriate mitigation depending on the
status of RTM and MD_CLEAR CPUID bits.
Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
TAA mitigation, VERW behavior and TSX feature for various combinations of
MSR_IA32_ARCH_CAPABILITIES bits.
1. "tsx=off"
========= ========= ============ ============ ============== =================== ======================
MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off
---------------------------------- -------------------------------------------------------------------------
TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
========= ========= ============ ============ ============== =================== ======================
0 0 0 HW default Yes Same as MDS Same as MDS
0 0 1 Invalid case Invalid case Invalid case Invalid case
0 1 0 HW default No Need ucode update Need ucode update
0 1 1 Disabled Yes TSX disabled TSX disabled
1 X 1 Disabled X None needed None needed
========= ========= ============ ============ ============== =================== ======================
2. "tsx=on"
========= ========= ============ ============ ============== =================== ======================
MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on
---------------------------------- -------------------------------------------------------------------------
TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
========= ========= ============ ============ ============== =================== ======================
0 0 0 HW default Yes Same as MDS Same as MDS
0 0 1 Invalid case Invalid case Invalid case Invalid case
0 1 0 HW default No Need ucode update Need ucode update
0 1 1 Enabled Yes None Same as MDS
1 X 1 Enabled X None needed None needed
========= ========= ============ ============ ============== =================== ======================
3. "tsx=auto"
========= ========= ============ ============ ============== =================== ======================
MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto
---------------------------------- -------------------------------------------------------------------------
TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
========= ========= ============ ============ ============== =================== ======================
0 0 0 HW default Yes Same as MDS Same as MDS
0 0 1 Invalid case Invalid case Invalid case Invalid case
0 1 0 HW default No Need ucode update Need ucode update
0 1 1 Disabled Yes TSX disabled TSX disabled
1 X 1 Enabled X None needed None needed
========= ========= ============ ============ ============== =================== ======================
In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
indicates whether MSR_IA32_TSX_CTRL is supported.
There are two control bits in IA32_TSX_CTRL MSR:
Bit 0: When set it disables the Restricted Transactional Memory (RTM)
sub-feature of TSX (will force all transactions to abort on the
XBEGIN instruction).
Bit 1: When set it disables the enumeration of the RTM and HLE feature
(i.e. it will make CPUID(EAX=7).EBX{bit4} and
CPUID(EAX=7).EBX{bit11} read as 0).

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 81
SUBLEVEL = 84
EXTRAVERSION =
NAME = "People's Front"
@ -926,6 +926,12 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# change __FILE__ to the relative path from the srctree
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
# ensure -fcf-protection is disabled when using retpoline as it is
# incompatible with -mindirect-branch=thunk-extern
ifdef CONFIG_RETPOLINE
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
# use the deterministic mode of AR if available
KBUILD_ARFLAGS := $(call ar-option,D)

View file

@ -443,7 +443,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302d0000 0x10000>;
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_CLK_DUMMY>,
clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
<&clks IMX7D_GPT1_ROOT_CLK>;
clock-names = "ipg", "per";
};
@ -452,7 +452,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302e0000 0x10000>;
interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_CLK_DUMMY>,
clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
<&clks IMX7D_GPT2_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";
@ -462,7 +462,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302f0000 0x10000>;
interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_CLK_DUMMY>,
clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
<&clks IMX7D_GPT3_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";
@ -472,7 +472,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x30300000 0x10000>;
interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX7D_CLK_DUMMY>,
clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
<&clks IMX7D_GPT4_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";

View file

@ -196,3 +196,7 @@
&twl_gpio {
ti,use-leds;
};
&twl_keypad {
status = "disabled";
};

View file

@ -72,7 +72,7 @@ ENDPROC(__vet_atags)
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
* r0 = cp#15 control register
* r0 = cp#15 control register (exc_ret for M-class)
* r1 = machine ID
* r2 = atags/dtb pointer
* r9 = processor ID
@ -141,7 +141,8 @@ __mmap_switched_data:
#ifdef CONFIG_CPU_CP15
.long cr_alignment @ r3
#else
.long 0 @ r3
M_CLASS(.long exc_ret) @ r3
AR_CLASS(.long 0) @ r3
#endif
.size __mmap_switched_data, . - __mmap_switched_data

View file

@ -205,6 +205,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1])
bic r0, r0, #V7M_SCB_CCR_IC
#endif
str r0, [r12, V7M_SCB_CCR]
/* Pass exc_ret to __mmap_switched */
mov r0, r10
#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
ret lr
ENDPROC(__after_proc_init)

View file

@ -458,8 +458,8 @@ static s8 dm365_queue_priority_mapping[][2] = {
};
static const struct dma_slave_map dm365_edma_map[] = {
{ "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) },
{ "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) },
{ "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) },
{ "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) },
{ "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) },
{ "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) },
{ "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) },

View file

@ -478,14 +478,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu)
static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster)
{
u32 reg;
int gating_bit = cpu;
pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu);
if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
return -EINVAL;
if (is_a83t && cpu == 0)
gating_bit = 4;
/* gate processor power */
reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
reg |= PRCM_PWROFF_GATING_REG_CORE(cpu);
reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit);
writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
udelay(20);

View file

@ -768,6 +768,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
return NULL;
}
static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst)
{
u32 instr = 0;
int fault;
if (user_mode(regs))
fault = get_user(instr, ip);
else
fault = probe_kernel_address(ip, instr);
*inst = __mem_to_opcode_arm(instr);
return fault;
}
static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
{
u16 instr = 0;
int fault;
if (user_mode(regs))
fault = get_user(instr, ip);
else
fault = probe_kernel_address(ip, instr);
*inst = __mem_to_opcode_thumb16(instr);
return fault;
}
static int
do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
@ -775,10 +805,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
unsigned long instr = 0, instrptr;
int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
unsigned int type;
unsigned int fault;
u16 tinstr = 0;
int isize = 4;
int thumb2_32b = 0;
int fault;
if (interrupts_enabled(regs))
local_irq_enable();
@ -787,15 +817,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (thumb_mode(regs)) {
u16 *ptr = (u16 *)(instrptr & ~1);
fault = probe_kernel_address(ptr, tinstr);
tinstr = __mem_to_opcode_thumb16(tinstr);
fault = alignment_get_thumb(regs, ptr, &tinstr);
if (!fault) {
if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
IS_T32(tinstr)) {
/* Thumb-2 32-bit */
u16 tinst2 = 0;
fault = probe_kernel_address(ptr + 1, tinst2);
tinst2 = __mem_to_opcode_thumb16(tinst2);
u16 tinst2;
fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
instr = __opcode_thumb32_compose(tinstr, tinst2);
thumb2_32b = 1;
} else {
@ -804,8 +833,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
}
}
} else {
fault = probe_kernel_address((void *)instrptr, instr);
instr = __mem_to_opcode_arm(instr);
fault = alignment_get_arm(regs, (void *)instrptr, &instr);
}
if (fault) {

View file

@ -135,13 +135,11 @@ __v7m_setup_cont:
dsb
mov r6, lr @ save LR
ldr sp, =init_thread_union + THREAD_START_SP
stmia sp, {r0-r3, r12}
cpsie i
svc #0
1: cpsid i
ldr r0, =exc_ret
orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK
str lr, [r0]
/* Calculate exc_ret */
orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK
ldmia sp, {r0-r3, r12}
str r5, [r12, #11 * 4] @ restore the original SVC vector entry
mov lr, r6 @ restore LR

View file

@ -63,3 +63,12 @@
reg = <1>;
};
};
&reg_dc1sw {
/*
* Ethernet PHY needs 30ms to properly power up and some more
* to initialize. 100ms should be plenty of time to finish
* whole process.
*/
regulator-enable-ramp-delay = <100000>;
};

View file

@ -113,6 +113,12 @@
};
&reg_dc1sw {
/*
* Ethernet PHY needs 30ms to properly power up and some more
* to initialize. 100ms should be plenty of time to finish
* whole process.
*/
regulator-enable-ramp-delay = <100000>;
regulator-name = "vcc-phy";
};

View file

@ -42,13 +42,14 @@
pinmux: pinmux@14029c {
compatible = "pinctrl-single";
reg = <0x0014029c 0x250>;
reg = <0x0014029c 0x26c>;
#address-cells = <1>;
#size-cells = <1>;
pinctrl-single,register-width = <32>;
pinctrl-single,function-mask = <0xf>;
pinctrl-single,gpio-range = <
&range 0 154 MODE_GPIO
&range 0 91 MODE_GPIO
&range 95 60 MODE_GPIO
>;
range: gpio-range {
#pinctrl-single,gpio-range-cells = <3>;

View file

@ -463,8 +463,7 @@
<&pinmux 108 16 27>,
<&pinmux 135 77 6>,
<&pinmux 141 67 4>,
<&pinmux 145 149 6>,
<&pinmux 151 91 4>;
<&pinmux 145 149 6>;
};
i2c1: i2c@e0000 {

View file

@ -21,7 +21,7 @@
*/
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
gic_its: gic-its@18200000 {
gic_its: gic-its@1820000 {
compatible = "arm,gic-v3-its";
reg = <0x01820000 0x10000>;
msi-controller;

View file

@ -68,6 +68,7 @@
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_IMP_NVIDIA 0x4E
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@ -100,6 +101,8 @@
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
#define HISI_CPU_PART_TSV110 0xD01
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@ -120,6 +123,7 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_KRYO2XX_GOLD \
MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, ARM_CPU_PART_KRYO2XX_GOLD)
#define MIDR_KRYO2XX_SILVER \

View file

@ -43,11 +43,11 @@
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@ -91,8 +91,9 @@
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)

View file

@ -303,23 +303,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, pte);
}
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
pteval_t lhs, rhs;
lhs = pte_val(pte_a);
rhs = pte_val(pte_b);
if (pte_present(pte_a))
lhs &= ~PTE_RDONLY;
if (pte_present(pte_b))
rhs &= ~PTE_RDONLY;
return (lhs == rhs);
}
/*
* Huge pte definitions.
*/

View file

@ -177,6 +177,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops)
struct insn_emulation *insn;
insn = kzalloc(sizeof(*insn), GFP_KERNEL);
if (!insn)
return;
insn->ops = ops;
insn->min = INSN_UNDEF;
@ -236,6 +239,8 @@ static void __init register_insn_emulation_sysctl(void)
insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
GFP_KERNEL);
if (!insns_sysctl)
return;
raw_spin_lock_irqsave(&insn_emulation_lock, flags);
list_for_each_entry(insn, &insn_emulation, node) {

View file

@ -906,6 +906,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
{ /* sentinel */ }
};
char const *str = "kpti command line option";

View file

@ -119,10 +119,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
/*
* Ensure updated trampoline is visible to instruction
* fetch before we patch in the branch.
* fetch before we patch in the branch. Although the
* architecture doesn't require an IPI in this case,
* Neoverse-N1 erratum #1542419 does require one
* if the TLB maintenance in module_enable_ro() is
* skipped due to rodata_enabled. It doesn't seem worth
* it to make it conditional given that this is
* certainly not a fast-path.
*/
__flush_icache_range((unsigned long)&dst[0],
(unsigned long)&dst[1]);
flush_icache_range((unsigned long)&dst[0],
(unsigned long)&dst[1]);
}
addr = (unsigned long)dst;
#else /* CONFIG_ARM64_MODULE_PLTS */

View file

@ -84,7 +84,7 @@ void __init prom_init(void)
* Here we will start up CPU1 in the background and ask it to
* reconfigure itself then go back to sleep.
*/
memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20);
memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20);
__sync();
set_c0_cause(C_SW0);
cpumask_set_cpu(1, &bmips_booted_mask);

View file

@ -43,7 +43,7 @@
/* O32 stack has to be 8-byte aligned. */
static u64 o32_stk[4096];
#define O32_STK &o32_stk[sizeof(o32_stk)]
#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)])
#define __PROM_O32(fun, arg) fun arg __asm__(#fun); \
__asm__(#fun " = call_o32")

View file

@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void)
#endif
}
extern char bmips_reset_nmi_vec;
extern char bmips_reset_nmi_vec_end;
extern char bmips_smp_movevec;
extern char bmips_smp_int_vec;
extern char bmips_smp_int_vec_end;
extern char bmips_reset_nmi_vec[];
extern char bmips_reset_nmi_vec_end[];
extern char bmips_smp_movevec[];
extern char bmips_smp_int_vec[];
extern char bmips_smp_int_vec_end[];
extern int bmips_smp_enabled;
extern int bmips_cpu_offset;

View file

@ -73,8 +73,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
extern unsigned long __xchg_small(volatile void *ptr, unsigned long val,
unsigned int size);
static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
int size)
static __always_inline
unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
{
switch (size) {
case 1:
@ -146,8 +146,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
unsigned long new, unsigned int size);
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, unsigned int size)
static __always_inline
unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, unsigned int size)
{
switch (size) {
case 1:

View file

@ -457,10 +457,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end)
static inline void bmips_nmi_handler_setup(void)
{
bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec,
&bmips_reset_nmi_vec_end);
bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec,
&bmips_smp_int_vec_end);
bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec,
bmips_reset_nmi_vec_end);
bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec,
bmips_smp_int_vec_end);
}
struct reset_vec_info {

View file

@ -214,6 +214,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
#ifndef __ASSEMBLY__
@ -460,7 +461,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR)
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \

View file

@ -717,6 +717,8 @@ static __init void update_tlbie_feature_flag(unsigned long pvr)
WARN_ONCE(1, "Unknown PVR");
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
}
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
}
}

View file

@ -434,6 +434,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */
unsigned long rb,rs,prs,r,ric;
rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rb_value), "r" (lpid));
}
}
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
@ -452,16 +483,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[0]), "r" (kvm->arch.lpid));
}
fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} else {
if (need_sync)

View file

@ -201,8 +201,31 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
return va;
}
static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
int apsize, int ssize)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */
unsigned long rb,rs,prs,r,ric;
rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/* Need the extra ptesync to ensure we don't reorder tlbie*/
asm volatile("ptesync": : :"memory");
@ -287,7 +310,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
} else {
__tlbie(vpn, psize, apsize, ssize);
fixup_tlbie(vpn, psize, apsize, ssize);
fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@ -860,7 +883,7 @@ static void native_flush_hash_range(unsigned long number, int local)
/*
* Just do one more with the last used values.
*/
fixup_tlbie(vpn, psize, psize, ssize);
fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)

View file

@ -215,21 +215,82 @@ static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
static inline void fixup_tlbie(void)
static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap)
{
unsigned long pid = 0;
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_lpid(unsigned long lpid)
{
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
@ -277,6 +338,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
@ -284,8 +346,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
fixup_tlbie_pid(pid);
}
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@ -329,6 +391,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
@ -336,8 +399,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
fixup_tlbie_lpid(lpid);
}
fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@ -410,6 +473,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@ -419,7 +484,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
fixup_tlbie();
fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@ -430,7 +495,7 @@ static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
fixup_tlbie_lpid(lpid);
fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@ -442,7 +507,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@ -773,7 +837,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}

View file

@ -70,6 +70,7 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
return 0;
}
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
u64 end_pfn = start_pfn + nr_pages - 1;
@ -110,6 +111,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
/* Trace memory needs to be aligned to the size */
end_pfn = round_down(end_pfn - nr_pages, nr_pages);
lock_device_hotplug();
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
/*
@ -118,7 +120,6 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
* we never try to remove memory that spans two iomem
* resources.
*/
lock_device_hotplug();
end_pfn = base_pfn + nr_pages;
for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
remove_memory(nid, pfn << PAGE_SHIFT, bytes);
@ -127,6 +128,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
return base_pfn << PAGE_SHIFT;
}
}
unlock_device_hotplug();
return 0;
}

View file

@ -150,20 +150,25 @@ static int pnv_smp_cpu_disable(void)
return 0;
}
static void pnv_flush_interrupts(void)
{
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (xive_enabled())
xive_flush_interrupt();
else
icp_opal_flush_interrupt();
} else {
icp_native_flush_interrupt();
}
}
static void pnv_smp_cpu_kill_self(void)
{
unsigned long srr1, unexpected_mask, wmask;
unsigned int cpu;
unsigned long srr1, wmask;
u64 lpcr_val;
/* Standard hot unplug procedure */
/*
* This hard disables local interurpts, ensuring we have no lazy
* irqs pending.
*/
WARN_ON(irqs_disabled());
hard_irq_disable();
WARN_ON(lazy_irq_pending());
idle_task_exit();
current->active_mm = NULL; /* for sanity */
@ -176,6 +181,27 @@ static void pnv_smp_cpu_kill_self(void)
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
/*
* This turns the irq soft-disabled state we're called with, into a
* hard-disabled state with pending irq_happened interrupts cleared.
*
* PACA_IRQ_DEC - Decrementer should be ignored.
* PACA_IRQ_HMI - Can be ignored, processing is done in real mode.
* PACA_IRQ_DBELL, EE, PMI - Unexpected.
*/
hard_irq_disable();
if (generic_check_cpu_restart(cpu))
goto out;
unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
if (local_paca->irq_happened & unexpected_mask) {
if (local_paca->irq_happened & PACA_IRQ_EE)
pnv_flush_interrupts();
DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
cpu, local_paca->irq_happened);
}
local_paca->irq_happened = PACA_IRQ_HARD_DIS;
/*
* We don't want to take decrementer interrupts while we are
* offline, so clear LPCR:PECE1. We keep PECE2 (and
@ -201,6 +227,7 @@ static void pnv_smp_cpu_kill_self(void)
srr1 = pnv_cpu_offline(cpu);
WARN_ON_ONCE(!irqs_disabled());
WARN_ON(lazy_irq_pending());
/*
@ -216,13 +243,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
if (((srr1 & wmask) == SRR1_WAKEEE) ||
((srr1 & wmask) == SRR1_WAKEHVI)) {
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (xive_enabled())
xive_flush_interrupt();
else
icp_opal_flush_interrupt();
} else
icp_native_flush_interrupt();
pnv_flush_interrupts();
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
@ -270,7 +291,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
out:
DBG("CPU%d coming online...\n", cpu);
}

View file

@ -84,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
__rc; \
})
static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
unsigned long spec = 0x010000UL;
int rc;
@ -114,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
return rc;
}
static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
unsigned long spec = 0x01UL;
int rc;

View file

@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf)
{
unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int seq;
do {
now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_time = READ_ONCE(idle->idle_time);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
in_idle = 0;
now = get_tod_clock();
if (idle_enter) {
if (idle_exit) {
in_idle = idle_exit - idle_enter;
} else if (now > idle_enter) {
in_idle = now - idle_enter;
}
}
idle_time += in_idle;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
u64 arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
unsigned long long now, idle_enter, idle_exit;
unsigned long long now, idle_enter, idle_exit, in_idle;
unsigned int seq;
do {
now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
in_idle = 0;
now = get_tod_clock();
if (idle_enter) {
if (idle_exit) {
in_idle = idle_exit - idle_enter;
} else if (now > idle_enter) {
in_idle = now - idle_enter;
}
}
return cputime_to_nsecs(in_idle);
}
void arch_cpu_idle_enter(void)

View file

@ -2110,13 +2110,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);

View file

@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
}
if (write) {
len = *lenp;
if (copy_from_user(buf, buffer,
len > sizeof(buf) ? sizeof(buf) : len))
len = min(*lenp, sizeof(buf));
if (copy_from_user(buf, buffer, len))
return -EFAULT;
buf[sizeof(buf) - 1] = '\0';
buf[len - 1] = '\0';
cmm_skip_blanks(buf, &p);
nr = simple_strtoul(p, &p, 0);
cmm_skip_blanks(p, &p);
seconds = simple_strtoul(p, &p, 0);
cmm_set_timeout(nr, seconds);
*ppos += *lenp;
} else {
len = sprintf(buf, "%ld %ld\n",
cmm_timeout_pages, cmm_timeout_seconds);
@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
len = *lenp;
if (copy_to_user(buffer, buf, len))
return -EFAULT;
*lenp = len;
*ppos += len;
}
*lenp = len;
*ppos += len;
return 0;
}

View file

@ -1904,6 +1904,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
If unsure, say y.
choice
prompt "TSX enable mode"
depends on CPU_SUP_INTEL
default X86_INTEL_TSX_MODE_OFF
help
Intel's TSX (Transactional Synchronization Extensions) feature
allows to optimize locking protocols through lock elision which
can lead to a noticeable performance boost.
On the other hand it has been shown that TSX can be exploited
to form side channel attacks (e.g. TAA) and chances are there
will be more of those attacks discovered in the future.
Therefore TSX is not enabled by default (aka tsx=off). An admin
might override this decision by tsx=on the command line parameter.
Even with TSX enabled, the kernel will attempt to enable the best
possible TAA mitigation setting depending on the microcode available
for the particular machine.
This option allows to set the default tsx mode between tsx=on, =off
and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
details.
Say off if not sure, auto if TSX is in use but it should be used on safe
platforms or on if TSX is in use and the security aspect of tsx is not
relevant.
config X86_INTEL_TSX_MODE_OFF
bool "off"
help
TSX is disabled if possible - equals to tsx=off command line parameter.
config X86_INTEL_TSX_MODE_ON
bool "on"
help
TSX is always enabled on TSX capable HW - equals the tsx=on command
line parameter.
config X86_INTEL_TSX_MODE_AUTO
bool "auto"
help
TSX is enabled on TSX capable HW that is believed to be safe against
side channel attacks- equals the tsx=auto command line parameter.
endchoice
config EFI
bool "EFI runtime service support"
depends on ACPI

View file

@ -4,12 +4,14 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/nmi.h>
#include "../perf_event.h"
static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
static unsigned long perf_nmi_window;
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@ -640,11 +642,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
* handler when multiple PMCs are active or PMC overflow while handling some
* other source of an NMI.
*
* Attempt to mitigate this by using the number of active PMCs to determine
* whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
* any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
* number of active PMCs or 2. The value of 2 is used in case an NMI does not
* arrive at the LAPIC in time to be collapsed into an already pending NMI.
* Attempt to mitigate this by creating an NMI window in which un-handled NMIs
* received during this window will be claimed. This prevents extending the
* window past when it is possible that latent NMIs should be received. The
* per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
* handled a counter. When an un-handled NMI is received, it will be claimed
* only if arriving within that window.
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
@ -662,21 +665,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
handled = x86_pmu_handle_irq(regs);
/*
* If a counter was handled, record the number of possible remaining
* NMIs that can occur.
* If a counter was handled, record a timestamp such that un-handled
* NMIs will be claimed if arriving within that window.
*/
if (handled) {
this_cpu_write(perf_nmi_counter,
min_t(unsigned int, 2, active));
this_cpu_write(perf_nmi_tstamp,
jiffies + perf_nmi_window);
return handled;
}
if (!this_cpu_read(perf_nmi_counter))
if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
return NMI_DONE;
this_cpu_dec(perf_nmi_counter);
return NMI_HANDLED;
}
@ -908,6 +909,9 @@ static int __init amd_core_pmu_init(void)
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
/* Avoid calulating the value each time in the NMI handler */
perf_nmi_window = msecs_to_jiffies(100);
switch (boot_cpu_data.x86) {
case 0x15:
pr_cont("Fam15h ");

View file

@ -389,7 +389,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
config &= ~perf_ibs->cnt_mask;
wrmsrl(hwc->config_base, config);
if (boot_cpu_data.x86 == 0x10)
wrmsrl(hwc->config_base, config);
config &= ~perf_ibs->enable_mask;
wrmsrl(hwc->config_base, config);
}
@ -564,7 +565,8 @@ static struct perf_ibs perf_ibs_op = {
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
.cnt_mask = IBS_OP_MAX_CNT,
.cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
IBS_OP_CUR_CNT_RAND,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
.max_period = IBS_OP_MAX_CNT << 4,
@ -625,7 +627,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (event->attr.sample_type & PERF_SAMPLE_RAW)
offset_max = perf_ibs->offset_max;
else if (check_rip)
offset_max = 2;
offset_max = 3;
else
offset_max = 1;
do {

View file

@ -485,10 +485,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags)
local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
uncore_enable_event(box, event);
if (box->n_active == 1) {
uncore_enable_box(box);
if (box->n_active == 1)
uncore_pmu_start_hrtimer(box);
}
}
void uncore_pmu_event_stop(struct perf_event *event, int flags)
@ -512,10 +510,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags)
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
if (box->n_active == 0) {
uncore_disable_box(box);
if (box->n_active == 0)
uncore_pmu_cancel_hrtimer(box);
}
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@ -769,6 +765,40 @@ static int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
static void uncore_pmu_enable(struct pmu *pmu)
{
struct intel_uncore_pmu *uncore_pmu;
struct intel_uncore_box *box;
uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
if (!uncore_pmu)
return;
box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
if (!box)
return;
if (uncore_pmu->type->ops->enable_box)
uncore_pmu->type->ops->enable_box(box);
}
static void uncore_pmu_disable(struct pmu *pmu)
{
struct intel_uncore_pmu *uncore_pmu;
struct intel_uncore_box *box;
uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
if (!uncore_pmu)
return;
box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
if (!box)
return;
if (uncore_pmu->type->ops->disable_box)
uncore_pmu->type->ops->disable_box(box);
}
static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
@ -794,6 +824,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
pmu->pmu = (struct pmu) {
.attr_groups = pmu->type->attr_groups,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = uncore_pmu_enable,
.pmu_disable = uncore_pmu_disable,
.event_init = uncore_pmu_event_init,
.add = uncore_pmu_event_add,
.del = uncore_pmu_event_del,

View file

@ -412,18 +412,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
return -EINVAL;
}
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
box->pmu->type->ops->disable_box(box);
}
static inline void uncore_enable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->enable_box)
box->pmu->type->ops->enable_box(box);
}
static inline void uncore_disable_event(struct intel_uncore_box *box,
struct perf_event *event)
{

View file

@ -389,5 +389,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */

View file

@ -6,7 +6,7 @@
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
* The "_X" parts are generally the EP and EX Xeons, or the
* "Extreme" ones, like Broadwell-E.
* "Extreme" ones, like Broadwell-E, or Atom microserver.
*
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
@ -61,6 +61,9 @@
#define INTEL_FAM6_TIGERLAKE_L 0x8C
#define INTEL_FAM6_TIGERLAKE 0x8D
#define INTEL_FAM6_COMETLAKE 0xA5
#define INTEL_FAM6_COMETLAKE_L 0xA6
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@ -80,6 +83,7 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
/* Xeon Phi */

View file

@ -281,6 +281,7 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head lpage_disallowed_link;
/*
* The following two entries are used to key the shadow page in the
@ -293,6 +294,7 @@ struct kvm_mmu_page {
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
bool unsync;
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
@ -807,6 +809,7 @@ struct kvm_arch {
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
@ -877,6 +880,8 @@ struct kvm_arch {
bool x2apic_broadcast_quirk_disabled;
bool guest_can_read_msr_platform_info;
struct task_struct *nx_lpage_recovery_thread;
};
struct kvm_vm_stat {
@ -890,6 +895,7 @@ struct kvm_vm_stat {
ulong mmu_unsync;
ulong remote_tlb_flush;
ulong lpages;
ulong nx_lpage_splits;
ulong max_mmu_page_hash_collisions;
};

View file

@ -84,6 +84,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
* The processor is not susceptible to a
* machine check error due to modifying the
* code page size along with either the
* physical address or cache type
* without TLB invalidation.
*/
#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
#define ARCH_CAP_TAA_NO BIT(8) /*
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@ -94,6 +106,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
#define MSR_IA32_TSX_CTRL 0x00000122
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176

View file

@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
#include <asm/segment.h>
/**
* mds_clear_cpu_buffers - Mitigation for MDS vulnerability
* mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers(void)
}
/**
* mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
* mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/

View file

@ -1003,4 +1003,11 @@ enum mds_mitigations {
MDS_MITIGATION_VMWERV,
};
enum taa_mitigations {
TAA_MITIGATION_OFF,
TAA_MITIGATION_UCODE_NEEDED,
TAA_MITIGATION_VERW,
TAA_MITIGATION_TSX_DISABLED,
};
#endif /* _ASM_X86_PROCESSOR_H */

View file

@ -1528,9 +1528,6 @@ static void setup_local_APIC(void)
{
int cpu = smp_processor_id();
unsigned int value;
#ifdef CONFIG_X86_32
int logical_apicid, ldr_apicid;
#endif
if (disable_apic) {
@ -1571,16 +1568,21 @@ static void setup_local_APIC(void)
apic->init_apic_ldr();
#ifdef CONFIG_X86_32
/*
* APIC LDR is initialized. If logical_apicid mapping was
* initialized during get_smp_config(), make sure it matches the
* actual value.
*/
logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
/* always use the value from LDR */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
if (apic->dest_logical) {
int logical_apicid, ldr_apicid;
/*
* APIC LDR is initialized. If logical_apicid mapping was
* initialized during get_smp_config(), make sure it matches
* the actual value.
*/
logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
if (logical_apicid != BAD_APICID)
WARN_ON(logical_apicid != ldr_apicid);
/* Always use the value from LDR. */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
}
#endif
/*

View file

@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o tsx.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o

View file

@ -32,11 +32,14 @@
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
#include "cpu.h"
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
static void __init taa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@ -103,6 +106,7 @@ void __init check_bugs(void)
ssb_select_mitigation();
l1tf_select_mitigation();
mds_select_mitigation();
taa_select_mitigation();
arch_smt_update();
@ -266,6 +270,100 @@ static int __init mds_cmdline(char *str)
}
early_param("mds", mds_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "TAA: " fmt
/* Default mitigation for TAA-affected CPUs */
static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
static bool taa_nosmt __ro_after_init;
static const char * const taa_strings[] = {
[TAA_MITIGATION_OFF] = "Vulnerable",
[TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
[TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
[TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled",
};
static void __init taa_select_mitigation(void)
{
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
}
/* TSX previously disabled by tsx=off */
if (!boot_cpu_has(X86_FEATURE_RTM)) {
taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
goto out;
}
if (cpu_mitigations_off()) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
}
/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
if (taa_mitigation == TAA_MITIGATION_OFF)
goto out;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
taa_mitigation = TAA_MITIGATION_VERW;
else
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
* VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
* A microcode update fixes this behavior to clear CPU buffers. It also
* adds support for MSR_IA32_TSX_CTRL which is enumerated by the
* ARCH_CAP_TSX_CTRL_MSR bit.
*
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
ia32_cap = x86_read_arch_cap_msr();
if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
* TSX is enabled, select alternate mitigation for TAA which is
* the same as MDS. Enable MDS static branch to clear CPU buffers.
*
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
static_branch_enable(&mds_user_clear);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
out:
pr_info("%s\n", taa_strings[taa_mitigation]);
}
static int __init tsx_async_abort_parse_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_TAA))
return 0;
if (!str)
return -EINVAL;
if (!strcmp(str, "off")) {
taa_mitigation = TAA_MITIGATION_OFF;
} else if (!strcmp(str, "full")) {
taa_mitigation = TAA_MITIGATION_VERW;
} else if (!strcmp(str, "full,nosmt")) {
taa_mitigation = TAA_MITIGATION_VERW;
taa_nosmt = true;
}
return 0;
}
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
@ -772,13 +870,10 @@ static void update_mds_branch_idle(void)
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
void arch_smt_update(void)
{
/* Enhanced IBRS implies STIBP. No update required. */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
mutex_lock(&spec_ctrl_mutex);
switch (spectre_v2_user) {
@ -804,6 +899,17 @@ void arch_smt_update(void)
break;
}
switch (taa_mitigation) {
case TAA_MITIGATION_VERW:
case TAA_MITIGATION_UCODE_NEEDED:
if (sched_smt_active())
pr_warn_once(TAA_MSG_SMT);
break;
case TAA_MITIGATION_TSX_DISABLED:
case TAA_MITIGATION_OFF:
break;
}
mutex_unlock(&spec_ctrl_mutex);
}
@ -1119,6 +1225,9 @@ void x86_spec_ctrl_setup_ap(void)
x86_amd_ssb_disable();
}
bool itlb_multihit_kvm_mitigation;
EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
#undef pr_fmt
#define pr_fmt(fmt) "L1TF: " fmt
@ -1274,11 +1383,24 @@ static ssize_t l1tf_show_state(char *buf)
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t itlb_multihit_show_state(char *buf)
{
if (itlb_multihit_kvm_mitigation)
return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
else
return sprintf(buf, "KVM: Vulnerable\n");
}
#else
static ssize_t l1tf_show_state(char *buf)
{
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
}
static ssize_t itlb_multihit_show_state(char *buf)
{
return sprintf(buf, "Processor vulnerable\n");
}
#endif
static ssize_t mds_show_state(char *buf)
@ -1298,6 +1420,21 @@ static ssize_t mds_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t tsx_async_abort_show_state(char *buf)
{
if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
(taa_mitigation == TAA_MITIGATION_OFF))
return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
taa_strings[taa_mitigation]);
}
return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static char *stibp_state(void)
{
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@ -1366,6 +1503,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MDS:
return mds_show_state(buf);
case X86_BUG_TAA:
return tsx_async_abort_show_state(buf);
case X86_BUG_ITLB_MULTIHIT:
return itlb_multihit_show_state(buf);
default:
break;
}
@ -1402,4 +1545,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
}
ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
}
ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
}
#endif

View file

@ -946,13 +946,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
#define NO_SPECULATION BIT(0)
#define NO_MELTDOWN BIT(1)
#define NO_SSB BIT(2)
#define NO_L1TF BIT(3)
#define NO_MDS BIT(4)
#define MSBDS_ONLY BIT(5)
#define NO_SWAPGS BIT(6)
#define NO_SPECULATION BIT(0)
#define NO_MELTDOWN BIT(1)
#define NO_SSB BIT(2)
#define NO_L1TF BIT(3)
#define NO_MDS BIT(4)
#define MSBDS_ONLY BIT(5)
#define NO_SWAPGS BIT(6)
#define NO_ITLB_MULTIHIT BIT(7)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@ -970,26 +971,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS),
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
/*
* Technically, swapgs isn't serializing on AMD (despite it previously
@ -999,14 +1000,16 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
* good enough for our purposes.
*/
VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT),
/* AMD Family 0xf - 0x12 */
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
{}
};
@ -1017,19 +1020,30 @@ static bool __init cpu_matches(unsigned long which)
return m && !!(m->driver_data & which);
}
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
u64 x86_read_arch_cap_msr(void)
{
u64 ia32_cap = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
return ia32_cap;
}
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@ -1046,6 +1060,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (!cpu_matches(NO_SWAPGS))
setup_force_cpu_bug(X86_BUG_SWAPGS);
/*
* When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
* - TSX is supported or
* - TSX_CTRL is present
*
* TSX_CTRL check is needed for cases when TSX could be disabled before
* the kernel boot e.g. kexec.
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
if (cpu_matches(NO_MELTDOWN))
return;
@ -1475,6 +1504,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu();
#endif
cpu_detect_tlb(&boot_cpu_data);
tsx_init();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)

View file

@ -45,6 +45,22 @@ struct _tlb_table {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
#ifdef CONFIG_CPU_SUP_INTEL
enum tsx_ctrl_states {
TSX_CTRL_ENABLE,
TSX_CTRL_DISABLE,
TSX_CTRL_NOT_SUPPORTED,
};
extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
extern void __init tsx_init(void);
extern void tsx_enable(void);
extern void tsx_disable(void);
#else
static inline void tsx_init(void) { }
#endif /* CONFIG_CPU_SUP_INTEL */
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
@ -65,4 +81,6 @@ unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
extern u64 x86_read_arch_cap_msr(void);
#endif /* ARCH_X86_CPU_H */

View file

@ -766,6 +766,11 @@ static void init_intel(struct cpuinfo_x86 *c)
init_intel_energy_perf(c);
init_intel_misc_features(c);
if (tsx_ctrl_state == TSX_CTRL_ENABLE)
tsx_enable();
if (tsx_ctrl_state == TSX_CTRL_DISABLE)
tsx_disable();
}
#ifdef CONFIG_X86_32

140
arch/x86/kernel/cpu/tsx.c Normal file
View file

@ -0,0 +1,140 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Intel Transactional Synchronization Extensions (TSX) control.
*
* Copyright (C) 2019 Intel Corporation
*
* Author:
* Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
*/
#include <linux/cpufeature.h>
#include <asm/cmdline.h>
#include "cpu.h"
enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
void tsx_disable(void)
{
u64 tsx;
rdmsrl(MSR_IA32_TSX_CTRL, tsx);
/* Force all transactions to immediately abort */
tsx |= TSX_CTRL_RTM_DISABLE;
/*
* Ensure TSX support is not enumerated in CPUID.
* This is visible to userspace and will ensure they
* do not waste resources trying TSX transactions that
* will always abort.
*/
tsx |= TSX_CTRL_CPUID_CLEAR;
wrmsrl(MSR_IA32_TSX_CTRL, tsx);
}
void tsx_enable(void)
{
u64 tsx;
rdmsrl(MSR_IA32_TSX_CTRL, tsx);
/* Enable the RTM feature in the cpu */
tsx &= ~TSX_CTRL_RTM_DISABLE;
/*
* Ensure TSX support is enumerated in CPUID.
* This is visible to userspace and will ensure they
* can enumerate and use the TSX feature.
*/
tsx &= ~TSX_CTRL_CPUID_CLEAR;
wrmsrl(MSR_IA32_TSX_CTRL, tsx);
}
static bool __init tsx_ctrl_is_supported(void)
{
u64 ia32_cap = x86_read_arch_cap_msr();
/*
* TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
* MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
*
* TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
* microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
* bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
* MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
* tsx= cmdline requests will do nothing on CPUs without
* MSR_IA32_TSX_CTRL support.
*/
return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
}
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
{
if (boot_cpu_has_bug(X86_BUG_TAA))
return TSX_CTRL_DISABLE;
return TSX_CTRL_ENABLE;
}
void __init tsx_init(void)
{
char arg[5] = {};
int ret;
if (!tsx_ctrl_is_supported())
return;
ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
if (ret >= 0) {
if (!strcmp(arg, "on")) {
tsx_ctrl_state = TSX_CTRL_ENABLE;
} else if (!strcmp(arg, "off")) {
tsx_ctrl_state = TSX_CTRL_DISABLE;
} else if (!strcmp(arg, "auto")) {
tsx_ctrl_state = x86_get_tsx_auto_mode();
} else {
tsx_ctrl_state = TSX_CTRL_DISABLE;
pr_err("tsx: invalid option, defaulting to off\n");
}
} else {
/* tsx= not provided */
if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
tsx_ctrl_state = x86_get_tsx_auto_mode();
else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
tsx_ctrl_state = TSX_CTRL_DISABLE;
else
tsx_ctrl_state = TSX_CTRL_ENABLE;
}
if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
tsx_disable();
/*
* tsx_disable() will change the state of the
* RTM CPUID bit. Clear it here since it is now
* expected to be not set.
*/
setup_clear_cpu_cap(X86_FEATURE_RTM);
} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
/*
* HW defaults TSX to be enabled at bootup.
* We may still need the TSX enable support
* during init for special cases like
* kexec after TSX is disabled.
*/
tsx_enable();
/*
* tsx_enable() will change the state of the
* RTM CPUID bit. Force it here since it is now
* expected to be set.
*/
setup_force_cpu_cap(X86_FEATURE_RTM);
}
}

View file

@ -501,8 +501,16 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
if (boot_cpu_has(X86_FEATURE_IBPB) &&
boot_cpu_has(X86_FEATURE_IBRS))
entry->edx |= F(SPEC_CTRL);
if (boot_cpu_has(X86_FEATURE_STIBP))
entry->edx |= F(INTEL_STIBP);
if (boot_cpu_has(X86_FEATURE_SSBD))
entry->edx |= F(SPEC_CTRL_SSBD);
/*
* We emulate ARCH_CAPABILITIES in software even
* if the host doesn't support it.

View file

@ -40,6 +40,7 @@
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <linux/kern_levels.h>
#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/pat.h>
@ -49,6 +50,30 @@
#include <asm/kvm_page_track.h>
#include "trace.h"
extern bool itlb_multihit_kvm_mitigation;
static int __read_mostly nx_huge_pages = -1;
static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
static struct kernel_param_ops nx_huge_pages_ops = {
.set = set_nx_huge_pages,
.get = param_get_bool,
};
static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
.set = set_nx_huge_pages_recovery_ratio,
.get = param_get_uint,
};
module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
__MODULE_PARM_TYPE(nx_huge_pages, "bool");
module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
&nx_huge_pages_recovery_ratio, 0644);
__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
/*
* When setting this variable to true it enables Two-Dimensional-Paging
* where the hardware walks 2 page tables:
@ -140,9 +165,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@ -261,9 +283,14 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
static void mmu_spte_set(u64 *sptep, u64 spte);
static bool is_executable_pte(u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
BUG_ON((mmio_mask & mmio_value) != mmio_value);
@ -283,6 +310,11 @@ static inline bool spte_ad_enabled(u64 spte)
return !(spte & shadow_acc_track_value);
}
static bool is_nx_huge_page_enabled(void)
{
return READ_ONCE(nx_huge_pages);
}
static inline u64 spte_shadow_accessed_mask(u64 spte)
{
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
@ -1027,10 +1059,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
{
if (sp->role.direct)
BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
else
if (!sp->role.direct) {
sp->gfns[index] = gfn;
return;
}
if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
pr_err_ratelimited("gfn mismatch under direct page %llx "
"(expected %llx, got %llx)\n",
sp->gfn,
kvm_mmu_page_get_gfn(sp, index), gfn);
}
/*
@ -1089,6 +1127,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
if (sp->lpage_disallowed)
return;
++kvm->stat.nx_lpage_splits;
list_add_tail(&sp->lpage_disallowed_link,
&kvm->arch.lpage_disallowed_mmu_pages);
sp->lpage_disallowed = true;
}
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
@ -1106,6 +1155,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
--kvm->stat.nx_lpage_splits;
sp->lpage_disallowed = false;
list_del(&sp->lpage_disallowed_link);
}
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@ -2658,6 +2714,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
kvm_reload_remote_mmus(kvm);
}
if (sp->lpage_disallowed)
unaccount_huge_nx_page(kvm, sp);
sp->role.invalid = 1;
return ret;
}
@ -2866,6 +2925,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (!speculative)
spte |= spte_shadow_accessed_mask(spte);
if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
is_nx_huge_page_enabled()) {
pte_access &= ~ACC_EXEC_MASK;
}
if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask;
else
@ -2986,10 +3050,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
ret = RET_PF_EMULATE;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
is_large_pte(*sptep)? "2MB" : "4kB",
*sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
*sptep, sptep);
trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
++vcpu->kvm->stat.lpages;
@ -3001,8 +3062,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
kvm_release_pfn_clean(pfn);
return ret;
}
@ -3037,9 +3096,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (ret <= 0)
return -1;
for (i = 0; i < ret; i++, gfn++, start++)
for (i = 0; i < ret; i++, gfn++, start++) {
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
put_page(pages[i]);
}
return 0;
}
@ -3087,40 +3148,71 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
{
struct kvm_shadow_walk_iterator iterator;
int level = *levelp;
u64 spte = *it.sptep;
if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
is_nx_huge_page_enabled() &&
is_shadow_present_pte(spte) &&
!is_large_pte(spte)) {
/*
* A small SPTE exists for this pfn, but FNAME(fetch)
* and __direct_map would like to create a large PTE
* instead: just force them to go down another level,
* patching back for them into pfn the next 9 bits of
* the address.
*/
u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
*pfnp |= gfn & page_mask;
(*levelp)--;
}
}
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
int map_writable, int level, kvm_pfn_t pfn,
bool prefault, bool lpage_disallowed)
{
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
int emulate = 0;
gfn_t pseudo_gfn;
int ret;
gfn_t gfn = gpa >> PAGE_SHIFT;
gfn_t base_gfn = gfn;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return 0;
return RET_PF_RETRY;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
write, level, gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
/*
* We cannot overwrite existing page tables with an NX
* large page, as the leaf could be executable.
*/
disallowed_hugepage_adjust(it, gfn, &pfn, &level);
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == level)
break;
}
drop_large_spte(vcpu, iterator.sptep);
if (!is_shadow_present_pte(*iterator.sptep)) {
u64 base_addr = iterator.addr;
drop_large_spte(vcpu, it.sptep);
if (!is_shadow_present_pte(*it.sptep)) {
sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
it.level - 1, true, ACC_ALL);
base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
pseudo_gfn = base_addr >> PAGE_SHIFT;
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
iterator.level - 1, 1, ACC_ALL);
link_shadow_page(vcpu, iterator.sptep, sp);
link_shadow_page(vcpu, it.sptep, sp);
if (lpage_disallowed)
account_huge_nx_page(vcpu->kvm, sp);
}
}
return emulate;
ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
write, level, base_gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
}
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
@ -3156,11 +3248,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
gfn_t *gfnp, kvm_pfn_t *pfnp,
gfn_t gfn, kvm_pfn_t *pfnp,
int *levelp)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *gfnp;
int level = *levelp;
/*
@ -3187,8 +3278,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
mask = KVM_PAGES_PER_HPAGE(level) - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
gfn &= ~mask;
*gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
@ -3415,11 +3504,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
{
int r;
int level;
bool force_pt_level = false;
bool force_pt_level;
kvm_pfn_t pfn;
unsigned long mmu_seq;
bool map_writable, write = error_code & PFERR_WRITE_MASK;
bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
is_nx_huge_page_enabled();
force_pt_level = lpage_disallowed;
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
/*
@ -3445,22 +3537,20 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, pfn,
prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@ -4050,6 +4140,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
is_nx_huge_page_enabled();
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@ -4060,8 +4152,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (r)
return r;
force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
PT_DIRECTORY_LEVEL);
force_pt_level =
lpage_disallowed ||
!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
if (level > PT_DIRECTORY_LEVEL &&
@ -4082,22 +4175,20 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
prefault, lpage_disallowed);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@ -5819,7 +5910,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@ -5869,7 +5960,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
break;
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return freed;
}
@ -5891,10 +5982,60 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
static bool get_nx_auto_mode(void)
{
/* Return true when CPU has the bug, and mitigations are ON */
return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
}
static void __set_nx_huge_pages(bool val)
{
nx_huge_pages = itlb_multihit_kvm_mitigation = val;
}
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
{
bool old_val = nx_huge_pages;
bool new_val;
/* In "auto" mode deploy workaround only if CPU has the bug. */
if (sysfs_streq(val, "off"))
new_val = 0;
else if (sysfs_streq(val, "force"))
new_val = 1;
else if (sysfs_streq(val, "auto"))
new_val = get_nx_auto_mode();
else if (strtobool(val, &new_val) < 0)
return -EINVAL;
__set_nx_huge_pages(new_val);
if (new_val != old_val) {
struct kvm *kvm;
int idx;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
idx = srcu_read_lock(&kvm->srcu);
kvm_mmu_invalidate_zap_all_pages(kvm);
srcu_read_unlock(&kvm->srcu, idx);
wake_up_process(kvm->arch.nx_lpage_recovery_thread);
}
mutex_unlock(&kvm_lock);
}
return 0;
}
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
if (nx_huge_pages == -1)
__set_nx_huge_pages(get_nx_auto_mode());
kvm_mmu_reset_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
@ -5961,3 +6102,116 @@ void kvm_mmu_module_exit(void)
unregister_shrinker(&mmu_shrinker);
mmu_audit_disable();
}
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
{
unsigned int old_val;
int err;
old_val = nx_huge_pages_recovery_ratio;
err = param_set_uint(val, kp);
if (err)
return err;
if (READ_ONCE(nx_huge_pages) &&
!old_val && nx_huge_pages_recovery_ratio) {
struct kvm *kvm;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
wake_up_process(kvm->arch.nx_lpage_recovery_thread);
mutex_unlock(&kvm_lock);
}
return err;
}
static void kvm_recover_nx_lpages(struct kvm *kvm)
{
int rcu_idx;
struct kvm_mmu_page *sp;
unsigned int ratio;
LIST_HEAD(invalid_list);
ulong to_zap;
rcu_idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
/*
* We use a separate list instead of just using active_mmu_pages
* because the number of lpage_disallowed pages is expected to
* be relatively small compared to the total.
*/
sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
struct kvm_mmu_page,
lpage_disallowed_link);
WARN_ON_ONCE(!sp->lpage_disallowed);
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
WARN_ON_ONCE(sp->lpage_disallowed);
if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
kvm_mmu_commit_zap_page(kvm, &invalid_list);
if (to_zap)
cond_resched_lock(&kvm->mmu_lock);
}
}
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
static long get_nx_lpage_recovery_timeout(u64 start_time)
{
return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
? start_time + 60 * HZ - get_jiffies_64()
: MAX_SCHEDULE_TIMEOUT;
}
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
{
u64 start_time;
long remaining_time;
while (true) {
start_time = get_jiffies_64();
remaining_time = get_nx_lpage_recovery_timeout(start_time);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && remaining_time > 0) {
schedule_timeout(remaining_time);
remaining_time = get_nx_lpage_recovery_timeout(start_time);
set_current_state(TASK_INTERRUPTIBLE);
}
set_current_state(TASK_RUNNING);
if (kthread_should_stop())
return 0;
kvm_recover_nx_lpages(kvm);
}
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
{
int err;
err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
"kvm-nx-lpage-recovery",
&kvm->arch.nx_lpage_recovery_thread);
if (!err)
kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
return err;
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
{
if (kvm->arch.nx_lpage_recovery_thread)
kthread_stop(kvm->arch.nx_lpage_recovery_thread);
}

View file

@ -216,4 +216,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
#endif

View file

@ -325,6 +325,65 @@ TRACE_EVENT(
__entry->kvm_gen == __entry->spte_gen
)
);
TRACE_EVENT(
kvm_mmu_set_spte,
TP_PROTO(int level, gfn_t gfn, u64 *sptep),
TP_ARGS(level, gfn, sptep),
TP_STRUCT__entry(
__field(u64, gfn)
__field(u64, spte)
__field(u64, sptep)
__field(u8, level)
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
__field(u8, u)
),
TP_fast_assign(
__entry->gfn = gfn;
__entry->spte = *sptep;
__entry->sptep = virt_to_phys(sptep);
__entry->level = level;
__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
__entry->x = is_executable_pte(__entry->spte);
__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
),
TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
__entry->gfn, __entry->spte,
__entry->r ? "r" : "-",
__entry->spte & PT_WRITABLE_MASK ? "w" : "-",
__entry->x ? "x" : "-",
__entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
__entry->level, __entry->sptep
)
);
TRACE_EVENT(
kvm_mmu_spte_requested,
TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
TP_ARGS(addr, level, pfn),
TP_STRUCT__entry(
__field(u64, gfn)
__field(u64, pfn)
__field(u8, level)
),
TP_fast_assign(
__entry->gfn = addr >> PAGE_SHIFT;
__entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
__entry->level = level;
),
TP_printk("gfn %llx pfn %llx level %d",
__entry->gfn, __entry->pfn, __entry->level
)
);
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH

View file

@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
kvm_release_pfn_clean(pfn);
return true;
}
@ -595,12 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
kvm_pfn_t pfn, bool map_writable, bool prefault)
kvm_pfn_t pfn, bool map_writable, bool prefault,
bool lpage_disallowed)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
gfn_t gfn, base_gfn;
direct_access = gw->pte_access;
@ -645,35 +648,48 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
for (;
shadow_walk_okay(&it) && it.level > hlevel;
shadow_walk_next(&it)) {
gfn_t direct_gfn;
/*
* FNAME(page_fault) might have clobbered the bottom bits of
* gw->gfn, restore them from the virtual address.
*/
gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
base_gfn = gfn;
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
/*
* We cannot overwrite existing page tables with an NX
* large page, as the leaf could be executable.
*/
disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == hlevel)
break;
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
if (is_shadow_present_pte(*it.sptep))
continue;
direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
if (!is_shadow_present_pte(*it.sptep)) {
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
it.level - 1, true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
if (lpage_disallowed)
account_huge_nx_page(vcpu->kvm, sp);
}
}
clear_sp_write_flooding_count(it.sptep);
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
it.level, gw->gfn, pfn, prefault, map_writable);
it.level, base_gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
out_gpte_changed:
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
}
@ -740,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int r;
kvm_pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
bool force_pt_level = false;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
is_nx_huge_page_enabled();
bool force_pt_level = lpage_disallowed;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@ -821,6 +839,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@ -829,19 +848,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
level, pfn, map_writable, prefault, lpage_disallowed);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)

View file

@ -736,8 +736,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
if (!npt_enabled) {
/* Shadow paging assumes NX to be available. */
efer |= EFER_NX;
if (!(efer & EFER_LMA))
efer &= ~EFER_LME;
}
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);

View file

@ -2785,17 +2785,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
u64 guest_efer = vmx->vcpu.arch.efer;
u64 ignore_bits = 0;
if (!enable_ept) {
/*
* NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
* host CPUID is more efficient than testing guest CPUID
* or CR4. Host SMEP is anyway a requirement for guest SMEP.
*/
if (boot_cpu_has(X86_FEATURE_SMEP))
guest_efer |= EFER_NX;
else if (!(guest_efer & EFER_NX))
ignore_bits |= EFER_NX;
}
/* Shadow paging assumes NX to be available. */
if (!enable_ept)
guest_efer |= EFER_NX;
/*
* LMA and LME handled by hardware; SCE meaningless outside long mode.

View file

@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@ -205,7 +205,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ "largepages", VM_STAT(lpages, .mode = 0444) },
{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
{ "max_mmu_page_hash_collisions",
VM_STAT(max_mmu_page_hash_collisions) },
{ NULL }
@ -1130,6 +1131,14 @@ u64 kvm_get_arch_capabilities(void)
rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
* the nested hypervisor runs with NX huge pages. If it is not,
* L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
* L1 guests, so it need not worry about its own (L2) guests.
*/
data |= ARCH_CAP_PSCHANGE_MC_NO;
/*
* If we're doing cache flushes (either "always" or "cond")
* we will do one whenever the guest does a vmlaunch/vmresume.
@ -1142,8 +1151,35 @@ u64 kvm_get_arch_capabilities(void)
if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
data |= ARCH_CAP_RDCL_NO;
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
data |= ARCH_CAP_SSB_NO;
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
/*
* On TAA affected systems, export MDS_NO=0 when:
* - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
* - Updated microcode is present. This is detected by
* the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
* that VERW clears CPU buffers.
*
* When MDS_NO=0 is exported, guests deploy clear CPU buffer
* mitigation and don't complain:
*
* "Vulnerable: Clear CPU buffers attempted, no microcode"
*
* If TSX is disabled on the system, guests are also mitigated against
* TAA and clear CPU buffer mitigation is not required for guests.
*/
if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
(data & ARCH_CAP_TSX_CTRL_MSR))
data &= ~ARCH_CAP_MDS_NO;
return data;
}
EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
@ -6502,7 +6538,7 @@ static void kvm_hyperv_tsc_notifier(void)
struct kvm_vcpu *vcpu;
int cpu;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_make_mclock_inprogress_request(kvm);
@ -6528,7 +6564,7 @@ static void kvm_hyperv_tsc_notifier(void)
spin_unlock(&ka->pvclock_gtod_sync_lock);
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
}
#endif
@ -6586,17 +6622,17 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != smp_processor_id())
if (vcpu->cpu != raw_smp_processor_id())
send_ipi = 1;
}
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@ -6722,12 +6758,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@ -8949,6 +8985,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@ -8980,6 +9017,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
}
int kvm_arch_post_init_vm(struct kvm *kvm)
{
return kvm_mmu_post_init_vm(kvm);
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
@ -9081,6 +9123,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
EXPORT_SYMBOL_GPL(x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
kvm_mmu_pre_destroy_vm(kvm);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {

View file

@ -893,9 +893,6 @@ static void __init kexec_enter_virtual_mode(void)
if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
runtime_code_page_mkexec();
/* clean DUMMY object */
efi_delete_dummy_variable();
#endif
}

View file

@ -266,19 +266,41 @@ void xen_reboot(int reason)
BUG();
}
static int reboot_reason = SHUTDOWN_reboot;
static bool xen_legacy_crash;
void xen_emergency_restart(void)
{
xen_reboot(SHUTDOWN_reboot);
xen_reboot(reboot_reason);
}
static int
xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
if (!kexec_crash_loaded())
xen_reboot(SHUTDOWN_crash);
if (!kexec_crash_loaded()) {
if (xen_legacy_crash)
xen_reboot(SHUTDOWN_crash);
reboot_reason = SHUTDOWN_crash;
/*
* If panic_timeout==0 then we are supposed to wait forever.
* However, to preserve original dom0 behavior we have to drop
* into hypervisor. (domU behavior is controlled by its
* config file)
*/
if (panic_timeout == 0)
panic_timeout = -1;
}
return NOTIFY_DONE;
}
static int __init parse_xen_legacy_crash(char *arg)
{
xen_legacy_crash = true;
return 0;
}
early_param("xen_legacy_crash", parse_xen_legacy_crash);
static struct notifier_block xen_panic_block = {
.notifier_call = xen_panic_event,
.priority = INT_MIN

View file

@ -955,9 +955,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
int i;
bool has_stats = false;
spin_lock_irq(blkg->q->queue_lock);
if (!blkg->online)
goto skip;
dname = blkg_dev_name(blkg);
if (!dname)
continue;
goto skip;
/*
* Hooray string manipulation, count is the size written NOT
@ -967,8 +972,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
*/
off += scnprintf(buf+off, size-off, "%s ", dname);
spin_lock_irq(blkg->q->queue_lock);
rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
offsetof(struct blkcg_gq, stat_bytes));
rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
@ -981,8 +984,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
spin_unlock_irq(blkg->q->queue_lock);
if (rbytes || wbytes || rios || wios) {
has_stats = true;
off += scnprintf(buf+off, size-off,
@ -1023,6 +1024,8 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
seq_commit(sf, -1);
}
}
skip:
spin_unlock_irq(blkg->q->queue_lock);
}
rcu_read_unlock();

View file

@ -643,12 +643,27 @@ ssize_t __weak cpu_show_mds(struct device *dev,
return sprintf(buf, "Not affected\n");
}
ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "Not affected\n");
}
ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "Not affected\n");
}
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@ -657,6 +672,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_spec_store_bypass.attr,
&dev_attr_l1tf.attr,
&dev_attr_mds.attr,
&dev_attr_tsx_async_abort.attr,
&dev_attr_itlb_multihit.attr,
NULL
};

View file

@ -228,8 +228,8 @@ static void nbd_put(struct nbd_device *nbd)
if (refcount_dec_and_mutex_lock(&nbd->refs,
&nbd_index_mutex)) {
idr_remove(&nbd_index_idr, nbd->index);
mutex_unlock(&nbd_index_mutex);
nbd_dev_remove(nbd);
mutex_unlock(&nbd_index_mutex);
}
}
@ -349,17 +349,16 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
goto done;
}
config = nbd->config;
if (!mutex_trylock(&cmd->lock)) {
nbd_config_put(nbd);
return BLK_EH_RESET_TIMER;
}
if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
@ -664,6 +663,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
if (cmd->status != BLK_STS_OK) {
dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
req);
ret = -ENOENT;
goto out;
}
if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
req);
@ -745,7 +750,10 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
blk_mq_complete_request(req);
}
@ -924,6 +932,25 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
int *err)
{
struct socket *sock;
*err = 0;
sock = sockfd_lookup(fd, err);
if (!sock)
return NULL;
if (sock->ops->shutdown == sock_no_shutdown) {
dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
*err = -EINVAL;
return NULL;
}
return sock;
}
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
bool netlink)
{
@ -933,7 +960,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct nbd_sock *nsock;
int err;
sock = sockfd_lookup(arg, &err);
sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
@ -985,7 +1012,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
int i;
int err;
sock = sockfd_lookup(arg, &err);
sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;

View file

@ -424,13 +424,14 @@ static void reset_bdev(struct zram *zram)
static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct file *file;
struct zram *zram = dev_to_zram(dev);
struct file *file = zram->backing_dev;
char *p;
ssize_t ret;
down_read(&zram->init_lock);
if (!zram->backing_dev) {
file = zram->backing_dev;
if (!file) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;

View file

@ -73,31 +73,39 @@ static void __init clk_boston_setup(struct device_node *np)
hw = clk_hw_register_fixed_rate(NULL, "input", NULL, 0, in_freq);
if (IS_ERR(hw)) {
pr_err("failed to register input clock: %ld\n", PTR_ERR(hw));
goto error;
goto fail_input;
}
onecell->hws[BOSTON_CLK_INPUT] = hw;
hw = clk_hw_register_fixed_rate(NULL, "sys", "input", 0, sys_freq);
if (IS_ERR(hw)) {
pr_err("failed to register sys clock: %ld\n", PTR_ERR(hw));
goto error;
goto fail_sys;
}
onecell->hws[BOSTON_CLK_SYS] = hw;
hw = clk_hw_register_fixed_rate(NULL, "cpu", "input", 0, cpu_freq);
if (IS_ERR(hw)) {
pr_err("failed to register cpu clock: %ld\n", PTR_ERR(hw));
goto error;
goto fail_cpu;
}
onecell->hws[BOSTON_CLK_CPU] = hw;
err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, onecell);
if (err)
if (err) {
pr_err("failed to add DT provider: %d\n", err);
goto fail_clk_add;
}
return;
error:
fail_clk_add:
clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_CPU]);
fail_cpu:
clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_SYS]);
fail_sys:
clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_INPUT]);
fail_input:
kfree(onecell);
}

View file

@ -1276,7 +1276,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
tp->write_seq = snd_isn;
tp->snd_nxt = snd_isn;
tp->snd_una = snd_isn;
inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
inet_sk(sk)->inet_id = prandom_u32();
assign_rxopt(sk, opt);
if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))

View file

@ -1716,7 +1716,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return peekmsg(sk, msg, len, nonblock, flags);
if (sk_can_busy_loop(sk) &&
skb_queue_empty(&sk->sk_receive_queue) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
sk_busy_loop(sk, nonblock);

View file

@ -703,6 +703,25 @@ static int bam_dma_terminate_all(struct dma_chan *chan)
/* remove all transactions, including active transaction */
spin_lock_irqsave(&bchan->vc.lock, flag);
/*
* If we have transactions queued, then some might be committed to the
* hardware in the desc fifo. The only way to reset the desc fifo is
* to do a hardware reset (either by pipe or the entire block).
* bam_chan_init_hw() will trigger a pipe reset, and also reinit the
* pipe. If the pipe is left disabled (default state after pipe reset)
* and is accessed by a connected hardware engine, a fatal error in
* the BAM will occur. There is a small window where this could happen
* with bam_chan_init_hw(), but it is assumed that the caller has
* stopped activity on any attached hardware engine. Make sure to do
* this first so that the BAM hardware doesn't cause memory corruption
* by accessing freed resources.
*/
if (!list_empty(&bchan->desc_list)) {
async_desc = list_first_entry(&bchan->desc_list,
struct bam_async_desc, desc_node);
bam_chan_init_hw(bchan, async_desc->dir);
}
list_for_each_entry_safe(async_desc, tmp,
&bchan->desc_list, desc_node) {
list_add(&async_desc->vd.node, &bchan->vc.desc_issued);

View file

@ -181,6 +181,7 @@ struct sprd_dma_dev {
struct sprd_dma_chn channels[0];
};
static void sprd_dma_free_desc(struct virt_dma_desc *vd);
static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param);
static struct of_dma_filter_info sprd_dma_info = {
.filter_fn = sprd_dma_filter_fn,
@ -493,12 +494,19 @@ static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
static void sprd_dma_free_chan_resources(struct dma_chan *chan)
{
struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
struct virt_dma_desc *cur_vd = NULL;
unsigned long flags;
spin_lock_irqsave(&schan->vc.lock, flags);
if (schan->cur_desc)
cur_vd = &schan->cur_desc->vd;
sprd_dma_stop(schan);
spin_unlock_irqrestore(&schan->vc.lock, flags);
if (cur_vd)
sprd_dma_free_desc(cur_vd);
vchan_free_chan_resources(&schan->vc);
pm_runtime_put(chan->device->dev);
}
@ -814,15 +822,22 @@ static int sprd_dma_resume(struct dma_chan *chan)
static int sprd_dma_terminate_all(struct dma_chan *chan)
{
struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
struct virt_dma_desc *cur_vd = NULL;
unsigned long flags;
LIST_HEAD(head);
spin_lock_irqsave(&schan->vc.lock, flags);
if (schan->cur_desc)
cur_vd = &schan->cur_desc->vd;
sprd_dma_stop(schan);
vchan_get_all_descriptors(&schan->vc, &head);
spin_unlock_irqrestore(&schan->vc.lock, flags);
if (cur_vd)
sprd_dma_free_desc(cur_vd);
vchan_dma_desc_free_list(&schan->vc, &head);
return 0;
}

View file

@ -585,9 +585,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
{
struct cppi41_channel *c = to_cpp41_chan(chan);
struct dma_async_tx_descriptor *txd = NULL;
struct cppi41_dd *cdd = c->cdd;
struct cppi41_desc *d;
struct scatterlist *sg;
unsigned int i;
int error;
error = pm_runtime_get(cdd->ddev.dev);
if (error < 0) {
pm_runtime_put_noidle(cdd->ddev.dev);
return NULL;
}
if (cdd->is_suspended)
goto err_out_not_ready;
d = c->desc;
for_each_sg(sgl, sg, sg_len, i) {
@ -610,7 +623,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
d++;
}
return &c->txd;
txd = &c->txd;
err_out_not_ready:
pm_runtime_mark_last_busy(cdd->ddev.dev);
pm_runtime_put_autosuspend(cdd->ddev.dev);
return txd;
}
static void cppi41_compute_td_desc(struct cppi41_desc *d)

View file

@ -72,6 +72,9 @@
#define XILINX_DMA_DMACR_CIRC_EN BIT(1)
#define XILINX_DMA_DMACR_RUNSTOP BIT(0)
#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5)
#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24)
#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16)
#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8)
#define XILINX_DMA_REG_DMASR 0x0004
#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15)
@ -2112,8 +2115,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
chan->config.gen_lock = cfg->gen_lock;
chan->config.master = cfg->master;
dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN;
if (cfg->gen_lock && chan->genlock) {
dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK;
dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
}
@ -2129,11 +2134,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
chan->config.delay = cfg->delay;
if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK;
dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
chan->config.coalesc = cfg->coalesc;
}
if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK;
dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
chan->config.delay = cfg->delay;
}

View file

@ -393,7 +393,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
pcie->device_id.vendor_id, pcie->device_id.device_id);
p = pcie->device_id.class_code;
printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
}
if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,

View file

@ -163,13 +163,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio,
case 0:
val = MAX77620_CNFG_GPIO_DBNC_None;
break;
case 1 ... 8:
case 1000 ... 8000:
val = MAX77620_CNFG_GPIO_DBNC_8ms;
break;
case 9 ... 16:
case 9000 ... 16000:
val = MAX77620_CNFG_GPIO_DBNC_16ms;
break;
case 17 ... 32:
case 17000 ... 32000:
val = MAX77620_CNFG_GPIO_DBNC_32ms;
break;
default:

View file

@ -264,7 +264,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_create_list_entry_array(&args->in, &info);
if (r)
goto error_free;
return r;
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
@ -277,8 +277,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
amdgpu_bo_list_put(list);
return r;
goto error_put_list;
}
handle = r;
@ -300,9 +299,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&fpriv->bo_list_lock);
if (IS_ERR(old)) {
amdgpu_bo_list_put(list);
r = PTR_ERR(old);
goto error_free;
goto error_put_list;
}
amdgpu_bo_list_put(old);
@ -319,8 +317,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
error_put_list:
amdgpu_bo_list_put(list);
error_free:
if (info)
kvfree(info);
kvfree(info);
return r;
}

View file

@ -203,7 +203,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r;
int r = 0;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@ -228,6 +228,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
fence = r ? ERR_PTR(r) : fence;
return fence;
}

View file

@ -428,7 +428,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
.interruptible = (bp->type != ttm_bo_type_kernel),
.no_wait_gpu = false,
.resv = bp->resv,
.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
.flags = bp->type != ttm_bo_type_kernel ?
TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
};
struct amdgpu_bo *bo;
unsigned long page_align, size = bp->size;

View file

@ -433,6 +433,7 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
enum display_dongle_type *dongle = &sink_cap->dongle_type;
uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE];
bool is_type2_dongle = false;
int retry_count = 2;
struct dp_hdmi_dongle_signature_data *dongle_signature;
/* Assume we have no valid DP passive dongle connected */
@ -445,13 +446,24 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
DP_HDMI_DONGLE_ADDRESS,
type2_dongle_buf,
sizeof(type2_dongle_buf))) {
*dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
/* Passive HDMI dongles can sometimes fail here without retrying*/
while (retry_count > 0) {
if (i2c_read(ddc,
DP_HDMI_DONGLE_ADDRESS,
type2_dongle_buf,
sizeof(type2_dongle_buf)))
break;
retry_count--;
}
if (retry_count == 0) {
*dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
"DP-DVI passive dongle %dMhz: ",
DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
return;
CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
"DP-DVI passive dongle %dMhz: ",
DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
return;
}
}
/* Check if Type 2 dongle.*/

View file

@ -1399,9 +1399,9 @@ bool dc_remove_plane_from_context(
* For head pipe detach surfaces from pipe for tail
* pipe just zero it out
*/
if (!pipe_ctx->top_pipe ||
(!pipe_ctx->top_pipe->top_pipe &&
if (!pipe_ctx->top_pipe || (!pipe_ctx->top_pipe->top_pipe &&
pipe_ctx->top_pipe->stream_res.opp != pipe_ctx->stream_res.opp)) {
pipe_ctx->top_pipe = NULL;
pipe_ctx->plane_state = NULL;
pipe_ctx->bottom_pipe = NULL;
} else {
@ -1803,8 +1803,6 @@ enum dc_status dc_remove_stream_from_ctx(
dc->res_pool->funcs->remove_stream_from_ctx(dc, new_ctx, stream);
memset(del_pipe, 0, sizeof(*del_pipe));
break;
}
}

View file

@ -4751,9 +4751,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
for (i = 0; i < podn_vdd_dep->count - 1; i++)
od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc)
for (i = 0; i < podn_vdd_dep->count; i++)
od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;

View file

@ -51,13 +51,11 @@
* granting userspace undue privileges. There are three categories of privilege.
*
* First, commands which are explicitly defined as privileged or which should
* only be used by the kernel driver. The parser generally rejects such
* commands, though it may allow some from the drm master process.
* only be used by the kernel driver. The parser rejects such commands
*
* Second, commands which access registers. To support correct/enhanced
* userspace functionality, particularly certain OpenGL extensions, the parser
* provides a whitelist of registers which userspace may safely access (for both
* normal and drm master processes).
* provides a whitelist of registers which userspace may safely access
*
* Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
* The parser always rejects such commands.
@ -82,9 +80,9 @@
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
* mentioned above: rejected, master-only, register whitelist. The parser
* implements a number of checks, including the privileged memory checks, via a
* general bitmasking mechanism.
* mentioned above: rejected, register whitelist. The parser implements a number
* of checks, including the privileged memory checks, via a general bitmasking
* mechanism.
*/
/*
@ -102,8 +100,6 @@ struct drm_i915_cmd_descriptor {
* CMD_DESC_REJECT: The command is never allowed
* CMD_DESC_REGISTER: The command should be checked against the
* register whitelist for the appropriate ring
* CMD_DESC_MASTER: The command is allowed if the submitting process
* is the DRM master
*/
u32 flags;
#define CMD_DESC_FIXED (1<<0)
@ -111,7 +107,6 @@ struct drm_i915_cmd_descriptor {
#define CMD_DESC_REJECT (1<<2)
#define CMD_DESC_REGISTER (1<<3)
#define CMD_DESC_BITMASK (1<<4)
#define CMD_DESC_MASTER (1<<5)
/*
* The command's unique identification bits and the bitmask to get them.
@ -192,7 +187,7 @@ struct drm_i915_cmd_table {
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
.cmd = { (op), ~0u << (opm) }, \
.cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
@ -207,14 +202,13 @@ struct drm_i915_cmd_table {
#define R CMD_DESC_REJECT
#define W CMD_DESC_REGISTER
#define B CMD_DESC_BITMASK
#define M CMD_DESC_MASTER
/* Command Mask Fixed Len Action
---------------------------------------------------------- */
static const struct drm_i915_cmd_descriptor common_cmds[] = {
static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, R ),
CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ),
CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
@ -244,7 +238,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ),
};
static const struct drm_i915_cmd_descriptor render_cmds[] = {
static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_PREDICATE, SMI, F, 1, S ),
@ -311,7 +305,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_RS_CONTEXT, SMI, F, 1, S ),
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
@ -328,7 +322,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ),
};
static const struct drm_i915_cmd_descriptor video_cmds[] = {
static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@ -372,7 +366,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD( MFX_WAIT, SMFX, F, 1, S ),
};
static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@ -410,7 +404,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
}}, ),
};
static const struct drm_i915_cmd_descriptor blt_cmds[] = {
static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B,
.bits = {{
@ -444,10 +438,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
};
static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
/*
* For Gen9 we can still rely on the h/w to enforce cmd security, and only
* need to re-enforce the register access checks. We therefore only need to
* teach the cmdparser how to find the end of each command, and identify
* register accesses. The table doesn't need to reject any commands, and so
* the only commands listed here are:
* 1) Those that touch registers
* 2) Those that do not have the default 8-bit length
*
* Note that the default MI length mask chosen for this table is 0xFF, not
* the 0x3F used on older devices. This is because the vast majority of MI
* cmds on Gen9 use a standard 8-bit Length field.
* All the Gen9 blitter instructions are standard 0xFF length mask, and
* none allow access to non-general registers, so in fact no BLT cmds are
* included in the table at all.
*
*/
static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, S ),
CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ),
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ),
CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ),
CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W,
.reg = { .offset = 1, .mask = 0x007FFFFC } ),
CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ),
CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W,
.reg = { .offset = 1, .mask = 0x007FFFFC } ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
/*
* We allow BB_START but apply further checks. We just sanitize the
* basic fields here.
*/
#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0)
#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B,
.bits = {{
.offset = 0,
.mask = MI_BB_START_OPERAND_MASK,
.expected = MI_BB_START_OPERAND_EXPECT,
}}, ),
};
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
@ -461,40 +509,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
#undef R
#undef W
#undef B
#undef M
static const struct drm_i915_cmd_table gen7_render_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ render_cmds, ARRAY_SIZE(render_cmds) },
static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
};
static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ render_cmds, ARRAY_SIZE(render_cmds) },
static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
};
static const struct drm_i915_cmd_table gen7_video_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ video_cmds, ARRAY_SIZE(video_cmds) },
static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
};
static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ vecs_cmds, ARRAY_SIZE(vecs_cmds) },
static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
};
static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ blt_cmds, ARRAY_SIZE(blt_cmds) },
static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
};
static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
{ common_cmds, ARRAY_SIZE(common_cmds) },
{ blt_cmds, ARRAY_SIZE(blt_cmds) },
static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
{ gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
};
static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
{ gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
};
/*
* Register whitelists, sorted by increasing register offset.
*/
@ -610,17 +662,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
REG32(FORCEWAKE_MT),
REG32(DERRMR),
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
};
static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
REG32(FORCEWAKE_MT),
REG32(DERRMR),
static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG32(BCS_SWCTRL),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
REG64_IDX(BCS_GPR, 0),
REG64_IDX(BCS_GPR, 1),
REG64_IDX(BCS_GPR, 2),
REG64_IDX(BCS_GPR, 3),
REG64_IDX(BCS_GPR, 4),
REG64_IDX(BCS_GPR, 5),
REG64_IDX(BCS_GPR, 6),
REG64_IDX(BCS_GPR, 7),
REG64_IDX(BCS_GPR, 8),
REG64_IDX(BCS_GPR, 9),
REG64_IDX(BCS_GPR, 10),
REG64_IDX(BCS_GPR, 11),
REG64_IDX(BCS_GPR, 12),
REG64_IDX(BCS_GPR, 13),
REG64_IDX(BCS_GPR, 14),
REG64_IDX(BCS_GPR, 15),
};
#undef REG64
@ -629,28 +691,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
struct drm_i915_reg_table {
const struct drm_i915_reg_descriptor *regs;
int num_regs;
bool master;
};
static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
{ gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
{ ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
{ gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
};
static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
{ gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
{ ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
{ gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
{ gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
{ hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
{ hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
{ gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
{ hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
};
static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
{ gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
{ hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
{ gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
{ gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
};
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@ -708,6 +769,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
{
u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
return 0xFF;
DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
return 0;
}
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
@ -865,18 +937,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
int cmd_table_count;
int ret;
if (!IS_GEN7(engine->i915))
if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) &&
engine->id == BCS))
return;
switch (engine->id) {
case RCS:
if (IS_HASWELL(engine->i915)) {
cmd_tables = hsw_render_ring_cmds;
cmd_tables = hsw_render_ring_cmd_table;
cmd_table_count =
ARRAY_SIZE(hsw_render_ring_cmds);
ARRAY_SIZE(hsw_render_ring_cmd_table);
} else {
cmd_tables = gen7_render_cmds;
cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
cmd_tables = gen7_render_cmd_table;
cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
@ -886,36 +959,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->reg_tables = ivb_render_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
}
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VCS:
cmd_tables = gen7_video_cmds;
cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
cmd_tables = gen7_video_cmd_table;
cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case BCS:
if (IS_HASWELL(engine->i915)) {
cmd_tables = hsw_blt_ring_cmds;
cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
if (IS_GEN9(engine->i915)) {
cmd_tables = gen9_blt_cmd_table;
cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
engine->get_cmd_length_mask =
gen9_blt_get_cmd_length_mask;
/* BCS Engine unsafe without parser */
engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
} else if (IS_HASWELL(engine->i915)) {
cmd_tables = hsw_blt_ring_cmd_table;
cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
} else {
cmd_tables = gen7_blt_cmds;
cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
cmd_tables = gen7_blt_cmd_table;
cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
if (IS_GEN9(engine->i915)) {
engine->reg_tables = gen9_blt_reg_tables;
engine->reg_table_count =
ARRAY_SIZE(gen9_blt_reg_tables);
} else if (IS_HASWELL(engine->i915)) {
engine->reg_tables = hsw_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
} else {
engine->reg_tables = ivb_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
}
engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VECS:
cmd_tables = hsw_vebox_cmds;
cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
cmd_tables = hsw_vebox_cmd_table;
cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
@ -941,7 +1024,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
return;
}
engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
engine->flags |= I915_ENGINE_USING_CMD_PARSER;
}
/**
@ -953,7 +1036,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
if (!intel_engine_needs_cmd_parser(engine))
if (!intel_engine_using_cmd_parser(engine))
return;
fini_hash_table(engine);
@ -1027,22 +1110,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
}
static const struct drm_i915_reg_descriptor *
find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
find_reg(const struct intel_engine_cs *engine, u32 addr)
{
const struct drm_i915_reg_table *table = engine->reg_tables;
const struct drm_i915_reg_descriptor *reg = NULL;
int count = engine->reg_table_count;
for (; count > 0; ++table, --count) {
if (!table->master || is_master) {
const struct drm_i915_reg_descriptor *reg;
for (; !reg && (count > 0); ++table, --count)
reg = __find_reg(table->regs, table->num_regs, addr);
reg = __find_reg(table->regs, table->num_regs, addr);
if (reg != NULL)
return reg;
}
}
return NULL;
return reg;
}
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@ -1127,8 +1204,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
const u32 *cmd, u32 length,
const bool is_master)
const u32 *cmd, u32 length)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
@ -1138,12 +1214,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
*cmd);
return false;
}
if (desc->flags & CMD_DESC_REGISTER) {
/*
* Get the distance between individual register offset
@ -1157,7 +1227,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
find_reg(engine, is_master, reg_addr);
find_reg(engine, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@ -1235,16 +1305,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return true;
}
static int check_bbstart(const struct i915_gem_context *ctx,
u32 *cmd, u32 offset, u32 length,
u32 batch_len,
u64 batch_start,
u64 shadow_batch_start)
{
u64 jump_offset, jump_target;
u32 target_cmd_offset, target_cmd_index;
/* For igt compatibility on older platforms */
if (CMDPARSER_USES_GGTT(ctx->i915)) {
DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
return -EACCES;
}
if (length != 3) {
DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
length);
return -EINVAL;
}
jump_target = *(u64*)(cmd+1);
jump_offset = jump_target - batch_start;
/*
* Any underflow of jump_target is guaranteed to be outside the range
* of a u32, so >= test catches both too large and too small
*/
if (jump_offset >= batch_len) {
DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
jump_target);
return -EINVAL;
}
/*
* This cannot overflow a u32 because we already checked jump_offset
* is within the BB, and the batch_len is a u32
*/
target_cmd_offset = lower_32_bits(jump_offset);
target_cmd_index = target_cmd_offset / sizeof(u32);
*(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
if (target_cmd_index == offset)
return 0;
if (ctx->jump_whitelist_cmds <= target_cmd_index) {
DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
return -EINVAL;
} else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
jump_target);
return -EINVAL;
}
return 0;
}
static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
{
const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
const u32 exact_size = BITS_TO_LONGS(batch_cmds);
u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
unsigned long *next_whitelist;
if (CMDPARSER_USES_GGTT(ctx->i915))
return;
if (batch_cmds <= ctx->jump_whitelist_cmds) {
bitmap_zero(ctx->jump_whitelist, batch_cmds);
return;
}
again:
next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
if (next_whitelist) {
kfree(ctx->jump_whitelist);
ctx->jump_whitelist = next_whitelist;
ctx->jump_whitelist_cmds =
next_size * BITS_PER_BYTE * sizeof(long);
return;
}
if (next_size > exact_size) {
next_size = exact_size;
goto again;
}
DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
return;
}
#define LENGTH_BIAS 2
/**
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
* @ctx: the context in which the batch is to execute
* @engine: the engine on which the batch is to execute
* @batch_obj: the batch buffer in question
* @shadow_batch_obj: copy of the batch buffer in question
* @batch_start: Canonical base address of batch
* @batch_start_offset: byte offset in the batch at which execution starts
* @batch_len: length of the commands in batch_obj
* @is_master: is the submitting process the drm master?
* @shadow_batch_obj: copy of the batch buffer in question
* @shadow_batch_start: Canonical base address of shadow_batch_obj
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@ -1252,14 +1418,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
int intel_engine_cmd_parser(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u64 batch_start,
u32 batch_start_offset,
u32 batch_len,
bool is_master)
struct drm_i915_gem_object *shadow_batch_obj,
u64 shadow_batch_start)
{
u32 *cmd, *batch_end;
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
@ -1273,6 +1442,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
return PTR_ERR(cmd);
}
init_whitelist(ctx, batch_len);
/*
* We use the batch length as size because the shadow object is as
* large or larger and copy_batch() will write MI_NOPs to the extra
@ -1282,31 +1453,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
do {
u32 length;
if (*cmd == MI_BATCH_BUFFER_END) {
if (needs_clflush_after) {
void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
drm_clflush_virt_range(ptr,
(void *)(cmd + 1) - ptr);
}
if (*cmd == MI_BATCH_BUFFER_END)
break;
}
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
ret = -EINVAL;
break;
}
/*
* If the batch buffer contains a chained batch, return an
* error that tells the caller to abort and dispatch the
* workload as a non-secure batch.
*/
if (desc->cmd.value == MI_BATCH_BUFFER_START) {
ret = -EACCES;
break;
goto err;
}
if (desc->flags & CMD_DESC_FIXED)
@ -1320,22 +1475,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
length,
batch_end - cmd);
ret = -EINVAL;
goto err;
}
if (!check_cmd(engine, desc, cmd, length)) {
ret = -EACCES;
goto err;
}
if (desc->cmd.value == MI_BATCH_BUFFER_START) {
ret = check_bbstart(ctx, cmd, offset, length,
batch_len, batch_start,
shadow_batch_start);
if (ret)
goto err;
break;
}
if (!check_cmd(engine, desc, cmd, length, is_master)) {
ret = -EACCES;
break;
}
if (ctx->jump_whitelist_cmds > offset)
set_bit(offset, ctx->jump_whitelist);
cmd += length;
offset += length;
if (cmd >= batch_end) {
DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
ret = -EINVAL;
break;
goto err;
}
} while (1);
if (needs_clflush_after) {
void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
}
err:
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_engine(engine, dev_priv, id) {
if (intel_engine_needs_cmd_parser(engine)) {
if (intel_engine_using_cmd_parser(engine)) {
active = true;
break;
}
@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
* the parser enabled.
* 9. Don't whitelist or handle oacontrol specially, as ownership
* for oacontrol state is moving to i915-perf.
* 10. Support for Gen9 BCS Parsing
*/
return 9;
return 10;
}

View file

@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = HAS_LEGACY_SEMAPHORES(dev_priv);
break;
case I915_PARAM_HAS_SECURE_BATCHES:
value = capable(CAP_SYS_ADMIN);
value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version(dev_priv);
@ -1627,6 +1627,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
i915_gem_suspend_late(dev_priv);
intel_display_set_init_power(dev_priv, false);
i915_rc6_ctx_wa_suspend(dev_priv);
intel_uncore_suspend(dev_priv);
/*
@ -1853,6 +1854,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
else
intel_display_set_init_power(dev_priv, true);
i915_rc6_ctx_wa_resume(dev_priv);
intel_engines_sanitize(dev_priv);
enable_rpm_wakeref_asserts(dev_priv);

View file

@ -801,6 +801,7 @@ struct intel_rps {
struct intel_rc6 {
bool enabled;
bool ctx_corrupted;
u64 prev_hw_residency[4];
u64 cur_residency[4];
};
@ -2496,6 +2497,12 @@ intel_info(const struct drm_i915_private *dev_priv)
#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv))
#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv))
/*
* The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
* All later gens can run the final buffer from the ppgtt
*/
#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv)
#define ENGINE_MASK(id) BIT(id)
#define RENDER_RING ENGINE_MASK(RCS)
#define BSD_RING ENGINE_MASK(VCS)
@ -2517,6 +2524,8 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv)
#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc)
#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop)
#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
@ -2549,10 +2558,12 @@ intel_info(const struct drm_i915_private *dev_priv)
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
(IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9)
/* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
(IS_CANNONLAKE(dev_priv) || \
IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
(IS_CANNONLAKE(dev_priv) || INTEL_GEN(dev_priv) == 9)
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@ -2944,6 +2955,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
u64 alignment,
u64 flags);
struct i915_vma * __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view,
u64 size,
u64 alignment,
u64 flags);
int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
@ -3337,12 +3356,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
int intel_engine_cmd_parser(struct i915_gem_context *cxt,
struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u64 user_batch_start,
u32 batch_start_offset,
u32 batch_len,
bool is_master);
struct drm_i915_gem_object *shadow_batch_obj,
u64 shadow_batch_start);
/* i915_perf.c */
extern void i915_perf_init(struct drm_i915_private *dev_priv);

View file

@ -174,6 +174,11 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
if (INTEL_GEN(i915) >= 6)
gen6_rps_idle(i915);
if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
i915_rc6_ctx_wa_check(i915);
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
}
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
intel_runtime_pm_put(i915);
@ -220,6 +225,9 @@ void i915_gem_unpark(struct drm_i915_private *i915)
*/
intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
i915->gt.awake = true;
if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
i915->gt.epoch = 1;
@ -4425,6 +4433,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_address_space *vm = &dev_priv->ggtt.vm;
return i915_gem_object_pin(obj, vm, view, size, alignment,
flags | PIN_GLOBAL);
}
struct i915_vma *
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view,
u64 size,
u64 alignment,
u64 flags)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
@ -4488,7 +4510,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(ret);
}
ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
ret = i915_vma_pin(vma, size, alignment, flags);
if (ret)
return ERR_PTR(ret);

View file

@ -124,6 +124,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
i915_ppgtt_put(ctx->ppgtt);
kfree(ctx->jump_whitelist);
for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
struct intel_context *ce = &ctx->__engine[n];
@ -339,6 +341,9 @@ __create_hw_context(struct drm_i915_private *dev_priv,
else
ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
ctx->jump_whitelist = NULL;
ctx->jump_whitelist_cmds = 0;
return ctx;
err_pid:

View file

@ -183,6 +183,12 @@ struct i915_gem_context {
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
/** jump_whitelist: Bit array for tracking cmds during cmdparsing */
unsigned long *jump_whitelist;
/** jump_whitelist_cmds: No of cmd slots available */
u32 jump_whitelist_cmds;
/** handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)

View file

@ -309,7 +309,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
return intel_engine_requires_cmd_parser(eb->engine) ||
(intel_engine_using_cmd_parser(eb->engine) &&
eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@ -1893,10 +1895,38 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
static struct i915_vma *
shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = eb->i915;
struct i915_address_space *vm;
u64 flags;
/*
* PPGTT backed shadow buffers must be mapped RO, to prevent
* post-scan tampering
*/
if (CMDPARSER_USES_GGTT(dev_priv)) {
flags = PIN_GLOBAL;
vm = &dev_priv->ggtt.vm;
} else if (eb->vm->has_read_only) {
flags = PIN_USER;
vm = eb->vm;
i915_gem_object_set_readonly(obj);
} else {
DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
return ERR_PTR(-EINVAL);
}
return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
}
static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_gem_object *shadow_batch_obj;
struct i915_vma *vma;
u64 batch_start;
u64 shadow_batch_start;
int err;
shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
@ -1904,29 +1934,54 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
if (IS_ERR(shadow_batch_obj))
return ERR_CAST(shadow_batch_obj);
err = intel_engine_cmd_parser(eb->engine,
vma = shadow_batch_pin(eb, shadow_batch_obj);
if (IS_ERR(vma))
goto out;
batch_start = gen8_canonical_addr(eb->batch->node.start) +
eb->batch_start_offset;
shadow_batch_start = gen8_canonical_addr(vma->node.start);
err = intel_engine_cmd_parser(eb->ctx,
eb->engine,
eb->batch->obj,
shadow_batch_obj,
batch_start,
eb->batch_start_offset,
eb->batch_len,
is_master);
shadow_batch_obj,
shadow_batch_start);
if (err) {
if (err == -EACCES) /* unhandled chained batch */
i915_vma_unpin(vma);
/*
* Unsafe GGTT-backed buffers can still be submitted safely
* as non-secure.
* For PPGTT backing however, we have no choice but to forcibly
* reject unsafe buffers
*/
if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
/* Execute original buffer non-secure */
vma = NULL;
else
vma = ERR_PTR(err);
goto out;
}
vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
goto out;
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
eb->batch_start_offset = 0;
eb->batch = vma;
/* eb->batch_len unchanged */
if (CMDPARSER_USES_GGTT(eb->i915))
eb->batch_flags |= I915_DISPATCH_SECURE;
out:
i915_gem_object_unpin_pages(shadow_batch_obj);
@ -2177,6 +2232,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct sync_file *out_fence = NULL;
@ -2187,7 +2243,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
eb.i915 = to_i915(dev);
eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@ -2209,8 +2265,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
if (INTEL_GEN(i915) >= 11)
return -ENODEV;
/* Return -EPERM to trigger fallback code on old binaries. */
if (!HAS_SECURE_BATCHES(i915))
return -EPERM;
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
return -EPERM;
return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@ -2297,34 +2360,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
if (eb.batch_len == 0)
eb.batch_len = eb.batch->size - eb.batch_start_offset;
if (eb_use_cmdparser(&eb)) {
struct i915_vma *vma;
vma = eb_parse(&eb, drm_is_current_master(file));
vma = eb_parse(&eb);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_vma;
}
if (vma) {
/*
* Batch parsed and accepted:
*
* Set the DISPATCH_SECURE bit to remove the NON_SECURE
* bit from MI_BATCH_BUFFER_START commands issued in
* the dispatch_execbuffer implementations. We
* specifically don't want that set on batches the
* command parser has accepted.
*/
eb.batch_flags |= I915_DISPATCH_SECURE;
eb.batch_start_offset = 0;
eb.batch = vma;
}
}
if (eb.batch_len == 0)
eb.batch_len = eb.batch->size - eb.batch_start_offset;
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.

View file

@ -158,7 +158,8 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
return 0;
if (enable_ppgtt == 1)
/* Full PPGTT is required by the Gen9 cmdparser */
if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9)
return 1;
if (enable_ppgtt == 2 && has_full_ppgtt)

Some files were not shown because too many files have changed in this diff Show more