summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap7
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt20
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst15
-rw-r--r--Documentation/core-api/protection-keys.rst44
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml123
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml3
-rw-r--r--Documentation/filesystems/overlayfs.rst4
-rw-r--r--Documentation/networking/dsa/dsa.rst363
-rw-r--r--Documentation/networking/ip-sysctl.rst15
-rw-r--r--Documentation/virt/kvm/api.rst2
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/arm/boot/dts/lan966x.dtsi2
-rw-r--r--arch/arm/include/asm/dma.h2
-rw-r--r--arch/arm/lib/findbit.S16
-rw-r--r--arch/arm/mach-pxa/corgi.c2
-rw-r--r--arch/arm/mach-pxa/hx4700.c2
-rw-r--r--arch/arm/mach-pxa/icontrol.c4
-rw-r--r--arch/arm/mach-pxa/littleton.c2
-rw-r--r--arch/arm/mach-pxa/magician.c2
-rw-r--r--arch/arm/mach-pxa/spitz.c2
-rw-r--r--arch/arm/mach-pxa/z2.c4
-rw-r--r--arch/csky/include/asm/tlb.h15
-rw-r--r--arch/loongarch/Kconfig2
-rw-r--r--arch/loongarch/include/asm/asmmacro.h12
-rw-r--r--arch/loongarch/include/asm/atomic.h37
-rw-r--r--arch/loongarch/include/asm/barrier.h4
-rw-r--r--arch/loongarch/include/asm/cmpxchg.h4
-rw-r--r--arch/loongarch/include/asm/compiler.h15
-rw-r--r--arch/loongarch/include/asm/elf.h2
-rw-r--r--arch/loongarch/include/asm/futex.h11
-rw-r--r--arch/loongarch/include/asm/irqflags.h1
-rw-r--r--arch/loongarch/include/asm/local.h1
-rw-r--r--arch/loongarch/include/asm/loongson.h16
-rw-r--r--arch/loongarch/include/asm/stacktrace.h12
-rw-r--r--arch/loongarch/include/asm/thread_info.h4
-rw-r--r--arch/loongarch/include/asm/tlb.h10
-rw-r--r--arch/loongarch/include/asm/uaccess.h2
-rw-r--r--arch/loongarch/kernel/cacheinfo.c11
-rw-r--r--arch/loongarch/kernel/entry.S4
-rw-r--r--arch/loongarch/kernel/env.c20
-rw-r--r--arch/loongarch/kernel/fpu.S174
-rw-r--r--arch/loongarch/kernel/genex.S12
-rw-r--r--arch/loongarch/kernel/head.S8
-rw-r--r--arch/loongarch/kernel/ptrace.c12
-rw-r--r--arch/loongarch/kernel/reset.c1
-rw-r--r--arch/loongarch/kernel/setup.c2
-rw-r--r--arch/loongarch/kernel/smp.c113
-rw-r--r--arch/loongarch/kernel/switch.S4
-rw-r--r--arch/loongarch/lib/clear_user.S2
-rw-r--r--arch/loongarch/lib/copy_user.S2
-rw-r--r--arch/loongarch/lib/delay.c1
-rw-r--r--arch/loongarch/mm/page.S118
-rw-r--r--arch/loongarch/mm/tlbex.S98
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/include/asm/tlb.h2
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/riscv/Makefile3
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts6
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts2
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/elf_kexec.c2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/archrandom.h9
-rw-r--r--arch/s390/include/asm/tlb.h3
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/tlb_64.h2
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/events/intel/lbr.c19
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/fpu/api.h2
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/sev.h7
-rw-r--r--arch/x86/include/asm/special_insns.h9
-rw-r--r--arch/x86/include/asm/tlb.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h1
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c47
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/vmware.c4
-rw-r--r--arch/x86/kernel/fpu/core.c14
-rw-r--r--arch/x86/kernel/pmem.c7
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/sev-shared.c25
-rw-r--r--arch/x86/kernel/sev.c17
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/pkeys.c15
-rw-r--r--arch/x86/mm/tlb.c31
-rw-r--r--certs/Kconfig1
-rw-r--r--drivers/acpi/cppc_acpi.c6
-rw-r--r--drivers/clk/clk-lan966x.c2
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c1
-rw-r--r--drivers/edac/ghes_edac.c11
-rw-r--r--drivers/edac/synopsys_edac.c44
-rw-r--r--drivers/gpio/gpio-pca953x.c22
-rw-r--r--drivers/gpio/gpio-xilinx.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c16
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c27
-rw-r--r--drivers/gpu/drm/drm_gem_ttm_helper.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c88
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h45
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c455
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c56
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h7
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c6
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c6
-rw-r--r--drivers/gpu/drm/tiny/simpledrm.c2
-rw-r--r--drivers/hwmon/k10temp.c12
-rw-r--r--drivers/i2c/busses/i2c-cadence.c30
-rw-r--r--drivers/i2c/busses/i2c-imx.c2
-rw-r--r--drivers/i2c/busses/i2c-mlxcpld.c2
-rw-r--r--drivers/idle/intel_idle.c33
-rw-r--r--drivers/infiniband/hw/irdma/cm.c50
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c1
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c1
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h1
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c4
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/misc/lkdtm/Makefile9
-rw-r--r--drivers/mmc/host/sdhci-omap.c14
-rw-r--r--drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/net/amt.c243
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c1
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c18
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c5
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c16
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-spi.c10
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c6
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c10
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c31
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_rx.c5
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_tx.c20
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_txrx.h6
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c30
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c17
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h14
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c11
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c7
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c65
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c106
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_flower.c6
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c9
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c112
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c2
-rw-r--r--drivers/net/ethernet/sfc/ptp.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c56
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c12
-rw-r--r--drivers/net/ipa/ipa_qmi_msg.h2
-rw-r--r--drivers/net/macsec.c33
-rw-r--r--drivers/net/pcs/pcs-xpcs.c2
-rw-r--r--drivers/net/sungem_phy.c1
-rw-r--r--drivers/net/usb/r8152.c16
-rw-r--r--drivers/net/virtio_net.c37
-rw-r--r--drivers/nvme/host/pci.c2
-rw-r--r--drivers/pinctrl/Kconfig2
-rw-r--r--drivers/pinctrl/mvebu/pinctrl-armada-37xx.c65
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c214
-rw-r--r--drivers/pinctrl/ralink/pinctrl-ralink.c2
-rw-r--r--drivers/pinctrl/sunplus/sppctl.c3
-rw-r--r--drivers/ptp/Kconfig1
-rw-r--r--drivers/s390/net/qeth_core_main.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c1
-rw-r--r--drivers/scsi/scsi_ioctl.c2
-rw-r--r--drivers/spi/spi-bcm2835.c12
-rw-r--r--drivers/spi/spi-cadence.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/ufs/core/ufshcd.c58
-rw-r--r--drivers/ufs/host/ufshcd-pltfrm.c15
-rw-r--r--drivers/virt/coco/sev-guest/sev-guest.c9
-rw-r--r--fs/attr.c74
-rw-r--r--fs/dlm/Kconfig9
-rw-r--r--fs/dlm/Makefile2
-rw-r--r--fs/dlm/ast.c4
-rw-r--r--fs/dlm/config.c21
-rw-r--r--fs/dlm/config.h3
-rw-r--r--fs/dlm/dlm_internal.h32
-rw-r--r--fs/dlm/lock.c143
-rw-r--r--fs/dlm/lock.h17
-rw-r--r--fs/dlm/lockspace.c31
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/member.c30
-rw-r--r--fs/dlm/plock.c51
-rw-r--r--fs/dlm/recoverd.c35
-rw-r--r--fs/dlm/user.c21
-rw-r--r--fs/erofs/compress.h2
-rw-r--r--fs/erofs/data.c39
-rw-r--r--fs/erofs/decompressor.c18
-rw-r--r--fs/erofs/decompressor_lzma.c1
-rw-r--r--fs/erofs/dir.c20
-rw-r--r--fs/erofs/zdata.c797
-rw-r--r--fs/erofs/zdata.h119
-rw-r--r--fs/erofs/zpvec.h159
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext2/super.c18
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/f2fs/file.c22
-rw-r--r--fs/f2fs/recovery.c10
-rw-r--r--fs/fat/file.c9
-rw-r--r--fs/io_uring.c14
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/ksmbd/vfs.c2
-rw-r--r--fs/ksmbd/vfs.h2
-rw-r--r--fs/locks.c77
-rw-r--r--fs/notify/fanotify/fanotify.c19
-rw-r--r--fs/notify/fanotify/fanotify.h2
-rw-r--r--fs/notify/fanotify/fanotify_user.c110
-rw-r--r--fs/notify/fdinfo.c6
-rw-r--r--fs/notify/fsnotify.c23
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ntfs/attrib.c8
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/slot_map.c46
-rw-r--r--fs/ocfs2/super.c21
-rw-r--r--fs/open.c60
-rw-r--r--fs/overlayfs/copy_up.c4
-rw-r--r--fs/overlayfs/inode.c87
-rw-r--r--fs/overlayfs/overlayfs.h15
-rw-r--r--fs/overlayfs/super.c25
-rw-r--r--fs/posix_acl.c168
-rw-r--r--fs/quota/dquot.c17
-rw-r--r--fs/read_write.c3
-rw-r--r--fs/reiserfs/inode.c16
-rw-r--r--fs/userfaultfd.c12
-rw-r--r--fs/xattr.c25
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/zonefs/super.c2
-rw-r--r--include/asm-generic/io.h2
-rw-r--r--include/asm-generic/tlb.h68
-rw-r--r--include/drm/gpu_scheduler.h4
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/evm.h6
-rw-r--r--include/linux/fanotify.h14
-rw-r--r--include/linux/fs.h140
-rw-r--r--include/linux/fsnotify_backend.h89
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/mnt_idmapping.h305
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/posix_acl.h1
-rw-r--r--include/linux/posix_acl_xattr.h34
-rw-r--r--include/linux/quotaops.h15
-rw-r--r--include/linux/security.h8
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/xattr.h2
-rw-r--r--include/net/addrconf.h3
-rw-r--r--include/net/amt.h20
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/protocol.h4
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sock.h8
-rw-r--r--include/net/tcp.h22
-rw-r--r--include/net/udp.h4
-rw-r--r--include/trace/events/dlm.h118
-rw-r--r--include/uapi/asm-generic/fcntl.h2
-rw-r--r--include/uapi/linux/fanotify.h8
-rw-r--r--include/uapi/linux/kvm.h2
-rw-r--r--kernel/configs/x86_debug.config3
-rw-r--r--kernel/locking/rwsem.c30
-rw-r--r--kernel/rcu/srcutree.c98
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/watch_queue.c103
-rw-r--r--kernel/workqueue.c5
-rw-r--r--mm/gup.c6
-rw-r--r--mm/hmm.c19
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/kfence/core.c18
-rw-r--r--mm/memory.c9
-rw-r--r--mm/memremap.c6
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/secretmem.c33
-rw-r--r--mm/shmem.c7
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/l2cap_core.c61
-rw-r--r--net/bluetooth/mgmt.c1
-rw-r--r--net/bridge/br_netlink.c8
-rw-r--r--net/caif/caif_socket.c20
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/secure_seq.c4
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/dsa/port.c7
-rw-r--r--net/dsa/switch.c1
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c49
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_input.c37
-rw-r--r--net/ipv4/ip_sockglue.c8
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/syncookies.c9
-rw-r--r--net/ipv4/sysctl_net_ipv4.c65
-rw-r--r--net/ipv4/tcp.c36
-rw-r--r--net/ipv4/tcp_fastopen.c9
-rw-r--r--net/ipv4/tcp_input.c92
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/tcp_metrics.c13
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c56
-rw-r--r--net/ipv4/tcp_recovery.c6
-rw-r--r--net/ipv4/tcp_timer.c30
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ip6_input.c23
-rw-r--r--net/ipv6/mcast.c14
-rw-r--r--net/ipv6/ping.c6
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp.c9
-rw-r--r--net/mac80211/iface.c3
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/protocol.c8
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c7
-rw-r--r--net/netfilter/nft_queue.c27
-rw-r--r--net/sched/cls_api.c16
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/stream.c19
-rw-r--r--net/sctp/stream_sched.c2
-rw-r--r--net/smc/smc_llc.c2
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_device.c15
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--net/xfrm/xfrm_state.c2
-rw-r--r--scripts/gdb/linux/symbols.py2
-rw-r--r--security/integrity/evm/evm_main.c12
-rw-r--r--security/integrity/ima/ima_policy.c4
-rw-r--r--security/security.c5
-rw-r--r--sound/soc/rockchip/rockchip_i2s.c160
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h11
-rw-r--r--tools/include/uapi/linux/kvm.h2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py34
-rw-r--r--tools/perf/util/bpf-loader.c18
-rw-r--r--tools/perf/util/symbol-elf.c56
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c8
399 files changed, 5656 insertions, 3544 deletions
diff --git a/.mailmap b/.mailmap
index 13e4f504e17f..71577c396252 100644
--- a/.mailmap
+++ b/.mailmap
@@ -60,6 +60,10 @@ Arnd Bergmann <arnd@arndb.de>
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
Ben Gardner <bgardner@wabtec.com>
@@ -135,6 +139,8 @@ Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com>
Frank Zago <fzago@systemfabricworks.com>
Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com>
@@ -371,6 +377,7 @@ Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
Sebastian Reichel <sre@kernel.org> <sre@debian.org>
Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2d26cb7e853..cc3ea8febc62 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3176,6 +3176,7 @@
no_entry_flush [PPC]
no_uaccess_flush [PPC]
mmio_stale_data=off [X86]
+ retbleed=off [X86]
Exceptions:
This does not have any effect on
@@ -3198,6 +3199,7 @@
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
mmio_stale_data=full,nosmt [X86]
+ retbleed=auto,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -5796,6 +5798,24 @@
expediting. Set to zero to disable automatic
expediting.
+ srcutree.srcu_max_nodelay [KNL]
+ Specifies the number of no-delay instances
+ per jiffy for which the SRCU grace period
+ worker thread will be rescheduled with zero
+ delay. Beyond this limit, worker thread will
+ be rescheduled with a sleep delay of one jiffy.
+
+ srcutree.srcu_max_nodelay_phase [KNL]
+ Specifies the per-grace-period phase, number of
+ non-sleeping polls of readers. Beyond this limit,
+ grace period worker thread will be rescheduled
+ with a sleep delay of one jiffy, between each
+ rescan of the readers, for a grace period phase.
+
+ srcutree.srcu_retry_check_delay [KNL]
+ Specifies number of microseconds of non-sleeping
+ delay between each non-sleeping poll of readers.
+
srcutree.small_contention_lim [KNL]
Specifies the number of update-side contention
events per jiffy will be tolerated before
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index aec2cd2aaea7..19754beb5a4e 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst
index ec575e72d0b2..bf28ac0401f3 100644
--- a/Documentation/core-api/protection-keys.rst
+++ b/Documentation/core-api/protection-keys.rst
@@ -4,31 +4,29 @@
Memory Protection Keys
======================
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
-which is found on Intel's Skylake (and later) "Scalable Processor"
-Server CPUs. It will be available in future non-server Intel parts
-and future AMD processors.
-
-For anyone wishing to test or use this feature, it is available in
-Amazon's EC2 C5 instances and is known to work there using an Ubuntu
-17.04 image.
-
-Memory Protection Keys provides a mechanism for enforcing page-based
-protections, but without requiring modification of the page tables
-when an application changes protection domains. It works by
-dedicating 4 previously ignored bits in each page table entry to a
-"protection key", giving 16 possible keys.
-
-There is also a new user-accessible register (PKRU) with two separate
-bits (Access Disable and Write Disable) for each key. Being a CPU
-register, PKRU is inherently thread-local, potentially giving each
+Memory Protection Keys provide a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables when an
+application changes protection domains.
+
+Pkeys Userspace (PKU) is a feature which can be found on:
+ * Intel server CPUs, Skylake and later
+ * Intel client CPUs, Tiger Lake (11th Gen Core) and later
+ * Future AMD CPUs
+
+Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
+a "protection key", giving 16 possible keys.
+
+Protections for each key are defined with a per-CPU user-accessible register
+(PKRU). Each of these is a 32-bit register storing two bits (Access Disable
+and Write Disable) for each of 16 keys.
+
+Being a CPU register, PKRU is inherently thread-local, potentially giving each
thread a different set of protections from every other thread.
-There are two new instructions (RDPKRU/WRPKRU) for reading and writing
-to the new register. The feature is only available in 64-bit mode,
-even though there is theoretically space in the PAE PTEs. These
-permissions are enforced on data access only and have no effect on
-instruction fetches.
+There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
+register. The feature is only available in 64-bit mode, even though there is
+theoretically space in the PAE PTEs. These permissions are enforced on data
+access only and have no effect on instruction fetches.
Syscalls
========
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 4f15463611f8..170cd201adc2 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -167,70 +167,65 @@ properties:
- in-band-status
fixed-link:
- allOf:
- - if:
- type: array
- then:
- deprecated: true
- items:
- - minimum: 0
- maximum: 31
- description:
- Emulated PHY ID, choose any but unique to the all
- specified fixed-links
-
- - enum: [0, 1]
- description:
- Duplex configuration. 0 for half duplex or 1 for
- full duplex
-
- - enum: [10, 100, 1000, 2500, 10000]
- description:
- Link speed in Mbits/sec.
-
- - enum: [0, 1]
- description:
- Pause configuration. 0 for no pause, 1 for pause
-
- - enum: [0, 1]
- description:
- Asymmetric pause configuration. 0 for no asymmetric
- pause, 1 for asymmetric pause
-
-
- - if:
- type: object
- then:
- properties:
- speed:
- description:
- Link speed.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [10, 100, 1000, 2500, 10000]
-
- full-duplex:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that full-duplex is used. When absent, half
- duplex is assumed.
-
- pause:
- $ref: /schemas/types.yaml#definitions/flag
- description:
- Indicates that pause should be enabled.
-
- asym-pause:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that asym_pause should be enabled.
-
- link-gpios:
- maxItems: 1
- description:
- GPIO to determine if the link is up
-
- required:
- - speed
+ oneOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ deprecated: true
+ items:
+ - minimum: 0
+ maximum: 31
+ description:
+ Emulated PHY ID, choose any but unique to the all
+ specified fixed-links
+
+ - enum: [0, 1]
+ description:
+ Duplex configuration. 0 for half duplex or 1 for
+ full duplex
+
+ - enum: [10, 100, 1000, 2500, 10000]
+ description:
+ Link speed in Mbits/sec.
+
+ - enum: [0, 1]
+ description:
+ Pause configuration. 0 for no pause, 1 for pause
+
+ - enum: [0, 1]
+ description:
+ Asymmetric pause configuration. 0 for no asymmetric
+ pause, 1 for asymmetric pause
+ - type: object
+ additionalProperties: false
+ properties:
+ speed:
+ description:
+ Link speed.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [10, 100, 1000, 2500, 10000]
+
+ full-duplex:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that full-duplex is used. When absent, half
+ duplex is assumed.
+
+ pause:
+ $ref: /schemas/types.yaml#definitions/flag
+ description:
+ Indicates that pause should be enabled.
+
+ asym-pause:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that asym_pause should be enabled.
+
+ link-gpios:
+ maxItems: 1
+ description:
+ GPIO to determine if the link is up
+
+ required:
+ - speed
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
index daa2f79a294f..1b1853062cd3 100644
--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -183,6 +183,7 @@ properties:
Should specify the gpio for phy reset.
phy-reset-duration:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Reset duration in milliseconds. Should present only if property
@@ -191,12 +192,14 @@ properties:
and 1 millisecond will be used instead.
phy-reset-active-high:
+ type: boolean
deprecated: true
description:
If present then the reset sequence using the GPIO specified in the
"phy-reset-gpios" property is reversed (H=reset state, L=operation state).
phy-reset-post-delay:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 316cfd8b1891..7da6c30ed596 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -466,10 +466,6 @@ overlay filesystem and the value of st_ino for filesystem objects may not be
persistent and could change even while the overlay filesystem is mounted, as
summarized in the `Inode properties`_ table above.
-4) "idmapped mounts"
-When the upper or lower layers are idmapped mounts overlayfs will be mounted
-without support for POSIX Access Control Lists (ACLs). This limitation will
-eventually be lifted.
Changes to underlying filesystems
---------------------------------
diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index ed7fa76e7a40..d742ba6bd211 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc.
Driver development
==================
-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
contain the various members described below.
-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------
-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+ to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+ method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+ retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.
Switch configuration
--------------------
-- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+ supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+ The returned information does not have to be static; the driver is passed the
+ CPU port number, as well as the tagging protocol of a possibly stacked
+ upstream switch, in case there are hardware limitations in terms of supported
+ tag formats.
-- ``probe``: probe routine which will be invoked by the DSA platform device upon
- registration to test for the presence/absence of a switch device. For MDIO
- devices, it is recommended to issue a read towards internal registers using
- the switch pseudo-PHY and return whether this is a supported device. For other
- buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+ problems with the master or other issues, the driver may support changing it
+ at runtime, either through a device tree property or through sysfs. In that
+ case, further calls to ``get_tag_protocol`` should report the protocol in
+ current use.
- ``setup``: setup function for the switch, this function is responsible for setting
up the ``dsa_switch_ops`` private structure with all it needs: register maps,
@@ -535,7 +617,17 @@ Switch configuration
fully configured and ready to serve any kind of request. It is recommended
to issue a software reset of the switch during this setup function in order to
avoid relying on what a previous software agent such as a bootloader/firmware
- may have previously configured.
+ may have previously configured. The method responsible for undoing any
+ applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+ destruction of per-port data structures. It is mandatory for some operations
+ such as registering and unregistering devlink port regions to be done from
+ these methods, otherwise they are optional. A port will be torn down only if
+ it has been previously set up. It is possible for a port to be set up during
+ probing only to be torn down immediately afterwards, for example in case its
+ PHY cannot be found. In this case, probing of the DSA switch continues
+ without that particular port.
PHY devices and link management
-------------------------------
@@ -635,26 +727,198 @@ Power management
``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+ associated with the port private database of the respective user port,
+ and the driver is notified to install them through ``port_fdb_add`` towards
+ the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+ through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+ with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+ addresses of the bridge ports, for which packets must be terminated locally
+ and not forwarded. They are associated with the address database for that
+ bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+ present in the same bridge as some DSA switch ports. These are also
+ associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+ bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+ is set to true by the driver. These are associated with the address database
+ for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+ the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+ ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+ databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+ Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
Bridge layer
------------
+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should do what's necessary at the switch
level to permit the joining port to be added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
+ By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+ of this bridge is also offloaded.
- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should do what's necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
- remaining bridge members. When the port leaves the bridge, it should be aged
- out at the switch hardware for the switch to (re) learn MAC addresses behind
- this port.
+ remaining bridge members.
- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
- hardware to forward/block/learn traffic. The switch driver is responsible for
- computing a STP state change based on current and asked parameters and perform
- the relevant ageing based on the intersection results
+ hardware to forward/block/learn traffic.
- ``port_bridge_flags``: bridge layer function invoked when a port must
configure its settings for e.g. flooding of unknown traffic or source address
@@ -667,21 +931,11 @@ Bridge layer
CPU port, and flooding towards the CPU port should also be enabled, due to a
lack of an explicit address filtering mechanism in the DSA core.
-- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
- ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
- a non-zero value. Returning success in this function activates the TX
- forwarding offload bridge feature for this port, which enables the tagging
- protocol driver to inject data plane packets towards the bridging domain that
- the port is a part of. Data plane packets are subject to FDB lookup, hardware
- learning on the CPU port, and do not override the port STP state.
- Additionally, replication of data plane packets (multicast, flooding) is
- handled in hardware and the bridge driver will transmit a single skb for each
- packet that needs replication. The method is provided as a configuration
- point for drivers that need to configure the hardware for enabling this
- feature.
-
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
- leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+ dynamically learned FDB entries on the port is necessary. This is called when
+ transitioning from an STP state where learning should take place to an STP
+ state where it shouldn't, or when leaving a bridge, or when address learning
+ is turned off via ``port_bridge_flags``.
Bridge VLAN filtering
---------------------
@@ -697,55 +951,44 @@ Bridge VLAN filtering
allowed.
- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
- (tagged or untagged) for the given switch port. If the operation is not
- supported by the hardware, this function should return ``-EOPNOTSUPP`` to
- inform the bridge code to fallback to a software implementation.
+ (tagged or untagged) for the given switch port. The CPU port becomes a member
+ of a VLAN only if a foreign bridge port is also a member of it (and
+ forwarding needs to take place in software), or the VLAN is installed to the
+ VLAN group of the bridge device itself, for termination purposes
+ (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+ reference counted and removed when there is no user left. Drivers do not need
+ to manually install a VLAN on the CPU port.
- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each VLAN the given port is a member
- of. A switchdev object is used to carry the VID and bridge flags.
-
- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
- associated with this VLAN ID. If the operation is not supported, this
- function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
- a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
+ associated with this VLAN ID.
- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+ physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+ hardware FDB entries with the software bridge, this method is implemented as
+ a means to view the entries visible on user ports in the hardware database.
+ The entries reported by this function have the ``self`` flag in the output of
+ the ``bridge fdb show`` command.
- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
- a multicast database entry. If the operation is not supported, this function
- should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
- software implementation. The switch hardware should be programmed with the
+ a multicast database entry. The switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
-
- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and MDB info.
-
Link aggregation
----------------
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b3a534ed0e7c..d7a1bf1a55b5 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER
Default: 4K
udp_wmem_min - INTEGER
- Minimal size of send buffer used by UDP sockets in moderation.
- Each UDP socket is able to use the size for sending data, even if
- total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
- Default: 4K
+ UDP does not have tx memory accounting and this tunable has no effect.
RAW variables
=============
@@ -2870,7 +2866,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
Default: 4K
sctp_wmem - vector of 3 INTEGERs: min, default, max
- Currently this tunable has no effect.
+ Only the first value ("min") is used, "default" and "max" are
+ ignored.
+
+ min: Minimum size of send buffer that can be used by SCTP sockets.
+ It is guaranteed to each SCTP socket (but not association) even
+ under moderate memory pressure.
+
+ Default: 4K
addr_scope_policy - INTEGER
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 6e090fb96a0e..98a283930307 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5658,7 +5658,7 @@ by a string of size ``name_size``.
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
- #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+ #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/MAINTAINERS b/MAINTAINERS
index 651616ed8ae2..e10b1835e5d7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7485,6 +7485,8 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
+R: Yue Hu <huyue2@coolpad.com>
+R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
@@ -9618,6 +9620,7 @@ F: drivers/input/misc/ideapad_slidebar.c
IDMAPPED MOUNTS
M: Christian Brauner <brauner@kernel.org>
+M: Seth Forshee <sforshee@kernel.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
@@ -15849,7 +15852,7 @@ PIN CONTROLLER - FREESCALE
M: Dong Aisheng <aisheng.dong@nxp.com>
M: Fabio Estevam <festevam@gmail.com>
M: Shawn Guo <shawnguo@kernel.org>
-M: Stefan Agner <stefan@agner.ch>
+M: Jacky Bai <ping.bai@nxp.com>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
L: linux-gpio@vger.kernel.org
S: Maintained
diff --git a/Makefile b/Makefile
index 00fd80c5dd6e..df92892325ae 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
NAME = Superb Owl
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index fcf9a41a4ef5..71b9272acb28 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE
config MMU_GATHER_NO_RANGE
bool
+ select MMU_GATHER_MERGE_VMAS
+
+config MMU_GATHER_NO_FLUSH_CACHE
+ bool
+
+config MMU_GATHER_MERGE_VMAS
+ bool
config MMU_GATHER_NO_GATHER
bool
diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi
index 3cb02fffe716..38e90a31d2dd 100644
--- a/arch/arm/boot/dts/lan966x.dtsi
+++ b/arch/arm/boot/dts/lan966x.dtsi
@@ -38,7 +38,7 @@
sys_clk: sys_clk {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <162500000>;
+ clock-frequency = <165625000>;
};
cpu_clk: cpu_clk {
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index a81dda65c576..45180a2cc47c 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -10,7 +10,7 @@
#else
#define MAX_DMA_ADDRESS ({ \
extern phys_addr_t arm_dma_zone_size; \
- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
+ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
#endif
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index b5e8b9ae4c7d..7fd3600db8ef 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -40,8 +40,8 @@ ENDPROC(_find_first_zero_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_zero_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
@@ -81,8 +81,8 @@ ENDPROC(_find_first_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
@@ -115,8 +115,8 @@ ENTRY(_find_first_zero_bit_be)
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
@@ -149,8 +149,8 @@ ENTRY(_find_first_bit_be)
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index c546356d0f02..5738496717e2 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -549,7 +549,7 @@ static struct pxa2xx_spi_controller corgi_spi_info = {
};
static struct gpiod_lookup_table corgi_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 2ae06edf413c..2fd665944103 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 753fe166ab68..624088257cfc 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = {
};
static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
- .dev_id = "pxa2xx-spi.3",
+ .dev_id = "spi3",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
@@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
};
static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
- .dev_id = "pxa2xx-spi.4",
+ .dev_id = "spi4",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index f98dc61e87af..98423a96f440 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -207,7 +207,7 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
};
static struct gpiod_lookup_table littleton_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 20456a55c4c5..0827ebca1d38 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -994,7 +994,7 @@ static struct pxa2xx_spi_controller magician_spi_info = {
};
static struct gpiod_lookup_table magician_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index dd88953adc9d..9964729cd428 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spitz_spi_info = {
};
static struct gpiod_lookup_table spitz_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index d03520555497..c4d4162a7e6e 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
};
static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
@@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/csky/include/asm/tlb.h b/arch/csky/include/asm/tlb.h
index 3498e65f59f8..702861c68874 100644
--- a/arch/csky/include/asm/tlb.h
+++ b/arch/csky/include/asm/tlb.h
@@ -4,21 +4,6 @@
#define __ASM_CSKY_TLB_H
#include <asm/cacheflush.h>
-
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* __ASM_CSKY_TLB_H */
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 53a912befb62..62b5b07fa4e1 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -69,7 +69,6 @@ config LOONGARCH
select GENERIC_TIME_VSYSCALL
select GPIOLIB
select HAVE_ARCH_AUDITSYSCALL
- select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -108,6 +107,7 @@ config LOONGARCH
select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID
select ZONE_DMA32
+ select MMU_GATHER_MERGE_VMAS if MMU
config 32BIT
bool
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index a1a04083bd67..be037a40580d 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -274,16 +274,4 @@
nor \dst, \src, zero
.endm
-.macro bgt r0 r1 label
- blt \r1, \r0, \label
-.endm
-
-.macro bltz r0 label
- blt \r0, zero, \label
-.endm
-
-.macro bgez r0 label
- bge \r0, zero, \label
-.endm
-
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index 979367ad4e2c..6b9aca9ab6e9 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -10,7 +10,6 @@
#include <linux/types.h>
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
#if __SIZEOF_LONG__ == 4
#define __LL "ll.w "
@@ -157,27 +156,25 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" addi.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" sub.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
@@ -320,27 +317,25 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" addi.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" sub.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h
index b6517eeeb141..cda977675854 100644
--- a/arch/loongarch/include/asm/barrier.h
+++ b/arch/loongarch/include/asm/barrier.h
@@ -48,9 +48,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
__asm__ __volatile__(
"sltu %0, %1, %2\n\t"
#if (__SIZEOF_LONG__ == 4)
- "sub.w %0, $r0, %0\n\t"
+ "sub.w %0, $zero, %0\n\t"
#elif (__SIZEOF_LONG__ == 8)
- "sub.d %0, $r0, %0\n\t"
+ "sub.d %0, $zero, %0\n\t"
#endif
: "=r" (mask)
: "r" (index), "r" (size)
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index 75b3a4478652..0a9b0fac1eee 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -55,9 +55,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
__asm__ __volatile__( \
"1: " ld " %0, %2 # __cmpxchg_asm \n" \
" bne %0, %z3, 2f \n" \
- " or $t0, %z4, $zero \n" \
+ " move $t0, %z4 \n" \
" " st " $t0, %1 \n" \
- " beq $zero, $t0, 1b \n" \
+ " beqz $t0, 1b \n" \
"2: \n" \
__WEAK_LLSC_MB \
: "=&r" (__ret), "=ZB"(*m) \
diff --git a/arch/loongarch/include/asm/compiler.h b/arch/loongarch/include/asm/compiler.h
deleted file mode 100644
index 657cebe70ace..000000000000
--- a/arch/loongarch/include/asm/compiler.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_COMPILER_H
-#define _ASM_COMPILER_H
-
-#define GCC_OFF_SMALL_ASM() "ZC"
-
-#define LOONGARCH_ISA_LEVEL "loongarch"
-#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch"
-#define LOONGARCH_ISA_LEVEL_RAW loongarch
-#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW
-
-#endif /* _ASM_COMPILER_H */
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index f3960b18a90e..5f3ff4781fda 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -288,8 +288,6 @@ struct arch_elf_state {
.interp_fp_abi = LOONGARCH_ABI_FP_ANY, \
}
-#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
-
extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state);
diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h
index 9de8231694ec..feb6658c84ff 100644
--- a/arch/loongarch/include/asm/futex.h
+++ b/arch/loongarch/include/asm/futex.h
@@ -8,7 +8,6 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/barrier.h>
-#include <asm/compiler.h>
#include <asm/errno.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
@@ -17,7 +16,7 @@
"1: ll.w %1, %4 # __futex_atomic_op\n" \
" " insn " \n" \
"2: sc.w $t0, %2 \n" \
- " beq $t0, $zero, 1b \n" \
+ " beqz $t0, 1b \n" \
"3: \n" \
" .section .fixup,\"ax\" \n" \
"4: li.w %0, %6 \n" \
@@ -82,9 +81,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
"# futex_atomic_cmpxchg_inatomic \n"
"1: ll.w %1, %3 \n"
" bne %1, %z4, 3f \n"
- " or $t0, %z5, $zero \n"
+ " move $t0, %z5 \n"
"2: sc.w $t0, %2 \n"
- " beq $zero, $t0, 1b \n"
+ " beqz $t0, 1b \n"
"3: \n"
__WEAK_LLSC_MB
" .section .fixup,\"ax\" \n"
@@ -95,8 +94,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
- : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+ : "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
+ : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory", "t0");
diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h
index 52121cd791fe..319a8c616f1f 100644
--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -9,7 +9,6 @@
#include <linux/compiler.h>
#include <linux/stringify.h>
-#include <asm/compiler.h>
#include <asm/loongarch.h>
static inline void arch_local_irq_enable(void)
diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h
index 2052a2267337..65fbbae9fc4d 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -9,7 +9,6 @@
#include <linux/bitops.h>
#include <linux/atomic.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
typedef struct {
atomic_long_t a;
diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h
index 6a8038725ba7..6e8f6972ceb6 100644
--- a/arch/loongarch/include/asm/loongson.h
+++ b/arch/loongarch/include/asm/loongson.h
@@ -39,18 +39,6 @@ extern const struct plat_smp_ops loongson3_smp_ops;
#define MAX_PACKAGES 16
-/* Chip Config register of each physical cpu package */
-extern u64 loongson_chipcfg[MAX_PACKAGES];
-#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
-
-/* Chip Temperature register of each physical cpu package */
-extern u64 loongson_chiptemp[MAX_PACKAGES];
-#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
-
-/* Freq Control register of each physical cpu package */
-extern u64 loongson_freqctrl[MAX_PACKAGES];
-#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
-
#define xconf_readl(addr) readl(addr)
#define xconf_readq(addr) readq(addr)
@@ -58,7 +46,7 @@ static inline void xconf_writel(u32 val, volatile void __iomem *addr)
{
asm volatile (
" st.w %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val)
);
@@ -68,7 +56,7 @@ static inline void xconf_writeq(u64 val64, volatile void __iomem *addr)
{
asm volatile (
" st.d %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val64)
);
diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h
index 26483e396ad1..6b5c2a7aa706 100644
--- a/arch/loongarch/include/asm/stacktrace.h
+++ b/arch/loongarch/include/asm/stacktrace.h
@@ -23,13 +23,13 @@
static __always_inline void prepare_frametrace(struct pt_regs *regs)
{
__asm__ __volatile__(
- /* Save $r1 */
+ /* Save $ra */
STORE_ONE_REG(1)
- /* Use $r1 to save PC */
- "pcaddi $r1, 0\n\t"
- STR_LONG_S " $r1, %0\n\t"
- /* Restore $r1 */
- STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t"
+ /* Use $ra to save PC */
+ "pcaddi $ra, 0\n\t"
+ STR_LONG_S " $ra, %0\n\t"
+ /* Restore $ra */
+ STR_LONG_L " $ra, %1, "STR_LONGSIZE"\n\t"
STORE_ONE_REG(2)
STORE_ONE_REG(3)
STORE_ONE_REG(4)
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 99beb11c2fa8..b7dd9f19a5a9 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -44,14 +44,14 @@ struct thread_info {
}
/* How to get the thread information struct from C. */
-register struct thread_info *__current_thread_info __asm__("$r2");
+register struct thread_info *__current_thread_info __asm__("$tp");
static inline struct thread_info *current_thread_info(void)
{
return __current_thread_info;
}
-register unsigned long current_stack_pointer __asm__("$r3");
+register unsigned long current_stack_pointer __asm__("$sp");
#endif /* !__ASSEMBLY__ */
diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
index 4f629ae9d5a9..dd24f5898f65 100644
--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr)
);
}
-/*
- * LoongArch doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
static void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index 217c6a3727b1..2b44edc604a2 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -162,7 +162,7 @@ do { \
"2: \n" \
" .section .fixup,\"ax\" \n" \
"3: li.w %0, %3 \n" \
- " or %1, $r0, $r0 \n" \
+ " move %1, $zero \n" \
" b 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c
index b38f5489d094..4662b06269f4 100644
--- a/arch/loongarch/kernel/cacheinfo.c
+++ b/arch/loongarch/kernel/cacheinfo.c
@@ -4,8 +4,9 @@
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
-#include <asm/cpu-info.h>
#include <linux/cacheinfo.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu-info.h>
/* Populates leaf and increments to next leaf */
#define populate_cache(cache, leaf, c_level, c_type) \
@@ -17,6 +18,8 @@ do { \
leaf->ways_of_associativity = c->cache.ways; \
leaf->size = c->cache.linesz * c->cache.sets * \
c->cache.ways; \
+ if (leaf->level > 2) \
+ leaf->size *= nodes_per_package; \
leaf++; \
} while (0)
@@ -95,11 +98,15 @@ static void cache_cpumap_setup(unsigned int cpu)
int populate_cache_leaves(unsigned int cpu)
{
- int level = 1;
+ int level = 1, nodes_per_package = 1;
struct cpuinfo_loongarch *c = &current_cpu_data;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ if (loongson_sysconf.nr_nodes > 1)
+ nodes_per_package = loongson_sysconf.cores_per_package
+ / loongson_sysconf.cores_per_node;
+
if (c->icache.waysize) {
populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d5b3dbcf5425..d53b631c9022 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -27,7 +27,7 @@ SYM_FUNC_START(handle_syscall)
addi.d sp, sp, -PT_SIZE
cfi_st t2, PT_R3
- cfi_rel_offset sp, PT_R3
+ cfi_rel_offset sp, PT_R3
st.d zero, sp, PT_R0
csrrd t2, LOONGARCH_CSR_PRMD
st.d t2, sp, PT_PRMD
@@ -50,7 +50,7 @@ SYM_FUNC_START(handle_syscall)
cfi_st a7, PT_R11
csrrd ra, LOONGARCH_CSR_ERA
st.d ra, sp, PT_ERA
- cfi_rel_offset ra, PT_ERA
+ cfi_rel_offset ra, PT_ERA
cfi_st tp, PT_R2
cfi_st u0, PT_R21
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 467946ecf451..82b478a5c665 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -17,21 +17,6 @@ u64 efi_system_table;
struct loongson_system_configuration loongson_sysconf;
EXPORT_SYMBOL(loongson_sysconf);
-u64 loongson_chipcfg[MAX_PACKAGES];
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-unsigned long long smp_group[MAX_PACKAGES];
-
-static void __init register_addrs_set(u64 *registers, const u64 addr, int num)
-{
- u64 i;
-
- for (i = 0; i < num; i++) {
- *registers = (i << 44) | addr;
- registers++;
- }
-}
-
void __init init_environ(void)
{
int efi_boot = fw_arg0;
@@ -50,11 +35,6 @@ void __init init_environ(void)
efi_memmap_init_early(&data);
memblock_reserve(data.phys_map & PAGE_MASK,
PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
-
- register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16);
- register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16);
- register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16);
- register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16);
}
static int __init init_cpu_fullname(void)
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index a631a7137667..576b3370a296 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -27,78 +27,78 @@
.endm
.macro sc_save_fp base
- EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_restore_fp base
- EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_save_fcc base, tmp0, tmp1
movcf2gr \tmp0, $fcc0
- move \tmp1, \tmp0
+ move \tmp1, \tmp0
movcf2gr \tmp0, $fcc1
bstrins.d \tmp1, \tmp0, 15, 8
movcf2gr \tmp0, $fcc2
@@ -113,11 +113,11 @@
bstrins.d \tmp1, \tmp0, 55, 48
movcf2gr \tmp0, $fcc7
bstrins.d \tmp1, \tmp0, 63, 56
- EX st.d \tmp1, \base, 0
+ EX st.d \tmp1, \base, 0
.endm
.macro sc_restore_fcc base, tmp0, tmp1
- EX ld.d \tmp0, \base, 0
+ EX ld.d \tmp0, \base, 0
bstrpick.d \tmp1, \tmp0, 7, 0
movgr2cf $fcc0, \tmp1
bstrpick.d \tmp1, \tmp0, 15, 8
@@ -138,11 +138,11 @@
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
- EX st.w \tmp0, \base, 0
+ EX st.w \tmp0, \base, 0
.endm
.macro sc_restore_fcsr base, tmp0
- EX ld.w \tmp0, \base, 0
+ EX ld.w \tmp0, \base, 0
movgr2fcsr fcsr0, \tmp0
.endm
@@ -151,9 +151,9 @@
*/
SYM_FUNC_START(_save_fp)
fpu_save_csr a0 t1
- fpu_save_double a0 t1 # clobbers t1
+ fpu_save_double a0 t1 # clobbers t1
fpu_save_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_save_fp)
EXPORT_SYMBOL(_save_fp)
@@ -161,10 +161,10 @@ EXPORT_SYMBOL(_save_fp)
* Restore a thread's fp context.
*/
SYM_FUNC_START(_restore_fp)
- fpu_restore_double a0 t1 # clobbers t1
- fpu_restore_csr a0 t1
- fpu_restore_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ fpu_restore_double a0 t1 # clobbers t1
+ fpu_restore_csr a0 t1
+ fpu_restore_cc a0 t1 t2 # clobbers t1, t2
+ jr ra
SYM_FUNC_END(_restore_fp)
/*
@@ -216,7 +216,7 @@ SYM_FUNC_START(_init_fpu)
movgr2fr.d $f30, t1
movgr2fr.d $f31, t1
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_init_fpu)
/*
@@ -225,11 +225,11 @@ SYM_FUNC_END(_init_fpu)
* a2: fcsr
*/
SYM_FUNC_START(_save_fp_context)
- sc_save_fcc a1 t1 t2
- sc_save_fcsr a2 t1
- sc_save_fp a0
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_save_fcc a1 t1 t2
+ sc_save_fcsr a2 t1
+ sc_save_fp a0
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_save_fp_context)
/*
@@ -238,14 +238,14 @@ SYM_FUNC_END(_save_fp_context)
* a2: fcsr
*/
SYM_FUNC_START(_restore_fp_context)
- sc_restore_fp a0
- sc_restore_fcc a1 t1 t2
- sc_restore_fcsr a2 t1
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_restore_fp a0
+ sc_restore_fcc a1 t1 t2
+ sc_restore_fcsr a2 t1
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_restore_fp_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(fault)
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 93496852b3cc..75e5be807a0d 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -28,23 +28,23 @@ SYM_FUNC_START(__arch_cpu_idle)
nop
idle 0
/* end of rollback region */
-1: jirl zero, ra, 0
+1: jr ra
SYM_FUNC_END(__arch_cpu_idle)
SYM_FUNC_START(handle_vint)
BACKUP_T0T1
SAVE_ALL
la.abs t1, __arch_cpu_idle
- LONG_L t0, sp, PT_ERA
+ LONG_L t0, sp, PT_ERA
/* 32 byte rollback region */
ori t0, t0, 0x1f
xori t0, t0, 0x1f
bne t0, t1, 1f
- LONG_S t0, sp, PT_ERA
+ LONG_S t0, sp, PT_ERA
1: move a0, sp
move a1, sp
la.abs t0, do_vint
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_vint)
@@ -72,7 +72,7 @@ SYM_FUNC_END(except_vec_cex)
build_prep_\prep
move a0, sp
la.abs t0, do_\handler
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
.endm
@@ -91,5 +91,5 @@ SYM_FUNC_END(except_vec_cex)
SYM_FUNC_START(handle_sys)
la.abs t0, handle_syscall
- jirl zero, t0, 0
+ jr t0
SYM_FUNC_END(handle_sys)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index d01e62dd414f..7062cdf0e33e 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -32,7 +32,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point
/* We might not get launched at the address the kernel is linked to,
so we jump there. */
la.abs t0, 0f
- jirl zero, t0, 0
+ jr t0
0:
la t0, __bss_start # clear .bss
st.d zero, t0, 0
@@ -50,7 +50,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point
/* KSave3 used for percpu base, initialized as 0 */
csrwr zero, PERCPU_BASE_KS
/* GPR21 used for percpu base (runtime), initialized as 0 */
- or u0, zero, zero
+ move u0, zero
la tp, init_thread_union
/* Set the SP after an empty pt_regs. */
@@ -85,8 +85,8 @@ SYM_CODE_START(smpboot_entry)
ld.d sp, t0, CPU_BOOT_STACK
ld.d tp, t0, CPU_BOOT_TINFO
- la.abs t0, 0f
- jirl zero, t0, 0
+ la.abs t0, 0f
+ jr t0
0:
bl start_secondary
SYM_CODE_END(smpboot_entry)
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index e6ab87948e1d..dc2b82ea894c 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -193,7 +193,7 @@ static int fpr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t);
- const int fcc_end = fcc_start + sizeof(u64);
+ const int fcsr_start = fcc_start + sizeof(u64);
int err;
BUG_ON(count % sizeof(elf_fpreg_t));
@@ -209,10 +209,12 @@ static int fpr_set(struct task_struct *target,
if (err)
return err;
- if (count > 0)
- err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.fcc,
- fcc_start, fcc_end);
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcc, fcc_start,
+ fcc_start + sizeof(u64));
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcsr, fcsr_start,
+ fcsr_start + sizeof(u32));
return err;
}
diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c
index 2b86469e4718..800c965a17ea 100644
--- a/arch/loongarch/kernel/reset.c
+++ b/arch/loongarch/kernel/reset.c
@@ -13,7 +13,6 @@
#include <linux/console.h>
#include <acpi/reboot.h>
-#include <asm/compiler.h>
#include <asm/idle.h>
#include <asm/loongarch.h>
#include <asm/reboot.h>
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index c74860b53375..8f5c2f9a1a83 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -126,7 +126,7 @@ static void __init parse_bios_table(const struct dmi_header *dm)
char *dmi_data = (char *)dm;
bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET);
- b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET);
+ b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6;
if (bios_extern & LOONGSON_EFI_ENABLE)
set_bit(EFI_BOOT, &efi.flags);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 73cec62504fb..09743103d9b3 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -278,116 +278,29 @@ void loongson3_cpu_die(unsigned int cpu)
mb();
}
-/*
- * The target CPU should go to XKPRANGE (uncached area) and flush
- * ICache/DCache/VCache before the control CPU can safely disable its clock.
- */
-static void loongson3_play_dead(int *state_addr)
+void play_dead(void)
{
- register int val;
- register void *addr;
+ register uint64_t addr;
register void (*init_fn)(void);
- __asm__ __volatile__(
- " li.d %[addr], 0x8000000000000000\n"
- "1: cacop 0x8, %[addr], 0 \n" /* flush ICache */
- " cacop 0x8, %[addr], 1 \n"
- " cacop 0x8, %[addr], 2 \n"
- " cacop 0x8, %[addr], 3 \n"
- " cacop 0x9, %[addr], 0 \n" /* flush DCache */
- " cacop 0x9, %[addr], 1 \n"
- " cacop 0x9, %[addr], 2 \n"
- " cacop 0x9, %[addr], 3 \n"
- " addi.w %[sets], %[sets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[sets], 1b \n"
- " li.d %[addr], 0x8000000000000000\n"
- "2: cacop 0xa, %[addr], 0 \n" /* flush VCache */
- " cacop 0xa, %[addr], 1 \n"
- " cacop 0xa, %[addr], 2 \n"
- " cacop 0xa, %[addr], 3 \n"
- " cacop 0xa, %[addr], 4 \n"
- " cacop 0xa, %[addr], 5 \n"
- " cacop 0xa, %[addr], 6 \n"
- " cacop 0xa, %[addr], 7 \n"
- " cacop 0xa, %[addr], 8 \n"
- " cacop 0xa, %[addr], 9 \n"
- " cacop 0xa, %[addr], 10 \n"
- " cacop 0xa, %[addr], 11 \n"
- " cacop 0xa, %[addr], 12 \n"
- " cacop 0xa, %[addr], 13 \n"
- " cacop 0xa, %[addr], 14 \n"
- " cacop 0xa, %[addr], 15 \n"
- " addi.w %[vsets], %[vsets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[vsets], 2b \n"
- " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */
- " st.w %[val], %[state_addr], 0 \n"
- " dbar 0 \n"
- " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */
- : [addr] "=&r" (addr), [val] "=&r" (val)
- : [state_addr] "r" (state_addr),
- [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
- [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
-
+ idle_task_exit();
local_irq_enable();
- change_csr_ecfg(ECFG0_IM, ECFGF_IPI);
+ set_csr_ecfg(ECFGF_IPI);
+ __this_cpu_write(cpu_state, CPU_DEAD);
+
+ __smp_mb();
+ do {
+ __asm__ __volatile__("idle 0\n\t");
+ addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+ } while (addr == 0);
- __asm__ __volatile__(
- " idle 0 \n"
- " li.w $t0, 0x1020 \n"
- " iocsrrd.d %[init_fn], $t0 \n" /* Get init PC */
- : [init_fn] "=&r" (addr)
- : /* No Input */
- : "a0");
- init_fn = __va(addr);
+ init_fn = (void *)TO_CACHE(addr);
+ iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
init_fn();
unreachable();
}
-void play_dead(void)
-{
- int *state_addr;
- unsigned int cpu = smp_processor_id();
- void (*play_dead_uncached)(int *s);
-
- idle_task_exit();
- play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead));
- state_addr = &per_cpu(cpu_state, cpu);
- mb();
- play_dead_uncached(state_addr);
-}
-
-static int loongson3_enable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
-
- return 0;
-}
-
-static int loongson3_disable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
-
- return 0;
-}
-
-static int register_loongson3_notifier(void)
-{
- return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE,
- "loongarch/loongson:prepare",
- loongson3_enable_clock,
- loongson3_disable_clock);
-}
-early_initcall(register_loongson3_notifier);
-
#endif
/*
diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S
index 53e2fa8e580e..37e84ac8ffc2 100644
--- a/arch/loongarch/kernel/switch.S
+++ b/arch/loongarch/kernel/switch.S
@@ -24,8 +24,8 @@ SYM_FUNC_START(__switch_to)
move tp, a2
cpu_restore_nonscratch a1
- li.w t0, _THREAD_SIZE - 32
- PTR_ADD t0, t0, tp
+ li.w t0, _THREAD_SIZE - 32
+ PTR_ADD t0, t0, tp
set_saved_sp t0, t1, t2
ldptr.d t1, a1, THREAD_CSRPRMD
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 25d9be5fbb19..16ba2b8dd68a 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(__clear_user)
1: st.b zero, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, -1
- bgt a1, zero, 1b
+ bgtz a1, 1b
2: move a0, a1
jr ra
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 9ae507f851b5..97d20327a69e 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -35,7 +35,7 @@ SYM_FUNC_START(__copy_user)
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
- bgt a2, zero, 1b
+ bgtz a2, 1b
3: move a0, a2
jr ra
diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c
index 5d856694fcfe..831d4761f385 100644
--- a/arch/loongarch/lib/delay.c
+++ b/arch/loongarch/lib/delay.c
@@ -7,7 +7,6 @@
#include <linux/smp.h>
#include <linux/timex.h>
-#include <asm/compiler.h>
#include <asm/processor.h>
void __delay(unsigned long cycles)
diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S
index ddc78ab33c7b..4c874a7af0ad 100644
--- a/arch/loongarch/mm/page.S
+++ b/arch/loongarch/mm/page.S
@@ -10,75 +10,75 @@
.align 5
SYM_FUNC_START(clear_page)
- lu12i.w t0, 1 << (PAGE_SHIFT - 12)
- add.d t0, t0, a0
+ lu12i.w t0, 1 << (PAGE_SHIFT - 12)
+ add.d t0, t0, a0
1:
- st.d zero, a0, 0
- st.d zero, a0, 8
- st.d zero, a0, 16
- st.d zero, a0, 24
- st.d zero, a0, 32
- st.d zero, a0, 40
- st.d zero, a0, 48
- st.d zero, a0, 56
- addi.d a0, a0, 128
- st.d zero, a0, -64
- st.d zero, a0, -56
- st.d zero, a0, -48
- st.d zero, a0, -40
- st.d zero, a0, -32
- st.d zero, a0, -24
- st.d zero, a0, -16
- st.d zero, a0, -8
- bne t0, a0, 1b
+ st.d zero, a0, 0
+ st.d zero, a0, 8
+ st.d zero, a0, 16
+ st.d zero, a0, 24
+ st.d zero, a0, 32
+ st.d zero, a0, 40
+ st.d zero, a0, 48
+ st.d zero, a0, 56
+ addi.d a0, a0, 128
+ st.d zero, a0, -64
+ st.d zero, a0, -56
+ st.d zero, a0, -48
+ st.d zero, a0, -40
+ st.d zero, a0, -32
+ st.d zero, a0, -24
+ st.d zero, a0, -16
+ st.d zero, a0, -8
+ bne t0, a0, 1b
- jirl $r0, ra, 0
+ jr ra
SYM_FUNC_END(clear_page)
EXPORT_SYMBOL(clear_page)
.align 5
SYM_FUNC_START(copy_page)
- lu12i.w t8, 1 << (PAGE_SHIFT - 12)
- add.d t8, t8, a0
+ lu12i.w t8, 1 << (PAGE_SHIFT - 12)
+ add.d t8, t8, a0
1:
- ld.d t0, a1, 0
- ld.d t1, a1, 8
- ld.d t2, a1, 16
- ld.d t3, a1, 24
- ld.d t4, a1, 32
- ld.d t5, a1, 40
- ld.d t6, a1, 48
- ld.d t7, a1, 56
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ ld.d t4, a1, 32
+ ld.d t5, a1, 40
+ ld.d t6, a1, 48
+ ld.d t7, a1, 56
- st.d t0, a0, 0
- st.d t1, a0, 8
- ld.d t0, a1, 64
- ld.d t1, a1, 72
- st.d t2, a0, 16
- st.d t3, a0, 24
- ld.d t2, a1, 80
- ld.d t3, a1, 88
- st.d t4, a0, 32
- st.d t5, a0, 40
- ld.d t4, a1, 96
- ld.d t5, a1, 104
- st.d t6, a0, 48
- st.d t7, a0, 56
- ld.d t6, a1, 112
- ld.d t7, a1, 120
- addi.d a0, a0, 128
- addi.d a1, a1, 128
+ st.d t0, a0, 0
+ st.d t1, a0, 8
+ ld.d t0, a1, 64
+ ld.d t1, a1, 72
+ st.d t2, a0, 16
+ st.d t3, a0, 24
+ ld.d t2, a1, 80
+ ld.d t3, a1, 88
+ st.d t4, a0, 32
+ st.d t5, a0, 40
+ ld.d t4, a1, 96
+ ld.d t5, a1, 104
+ st.d t6, a0, 48
+ st.d t7, a0, 56
+ ld.d t6, a1, 112
+ ld.d t7, a1, 120
+ addi.d a0, a0, 128
+ addi.d a1, a1, 128
- st.d t0, a0, -64
- st.d t1, a0, -56
- st.d t2, a0, -48
- st.d t3, a0, -40
- st.d t4, a0, -32
- st.d t5, a0, -24
- st.d t6, a0, -16
- st.d t7, a0, -8
+ st.d t0, a0, -64
+ st.d t1, a0, -56
+ st.d t2, a0, -48
+ st.d t3, a0, -40
+ st.d t4, a0, -32
+ st.d t5, a0, -24
+ st.d t6, a0, -16
+ st.d t7, a0, -8
- bne t8, a0, 1b
- jirl $r0, ra, 0
+ bne t8, a0, 1b
+ jr ra
SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 7eee40271577..de19fa2d7f0d 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -18,7 +18,7 @@
REG_S a2, sp, PT_BVADDR
li.w a1, \write
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(tlb_do_page_fault_\write)
.endm
@@ -34,7 +34,7 @@ SYM_FUNC_START(handle_tlb_protect)
csrrd a2, LOONGARCH_CSR_BADV
REG_S a2, sp, PT_BVADDR
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_tlb_protect)
@@ -47,7 +47,7 @@ SYM_FUNC_START(handle_tlb_load)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_load
+ bltz t0, vmalloc_load
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_load:
@@ -80,7 +80,7 @@ vmalloc_done_load:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_load
+ bnez t0, tlb_huge_update_load
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -100,12 +100,12 @@ smp_pgtable_change_load:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_load
+ beqz t0, smp_pgtable_change_load
#else
st.d t0, t1, 0
#endif
@@ -139,23 +139,23 @@ tlb_huge_update_load:
#endif
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
tlbsrch
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_load
+ beqz t0, tlb_huge_update_load
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
@@ -178,27 +178,27 @@ tlb_huge_update_load:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_load:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_0
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_load)
SYM_FUNC_START(handle_tlb_store)
@@ -210,7 +210,7 @@ SYM_FUNC_START(handle_tlb_store)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_store
+ bltz t0, vmalloc_store
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_store:
@@ -244,7 +244,7 @@ vmalloc_done_store:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_store
+ bnez t0, tlb_huge_update_store
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -265,12 +265,12 @@ smp_pgtable_change_store:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_store
+ beqz t0, smp_pgtable_change_store
#else
st.d t0, t1, 0
#endif
@@ -306,24 +306,24 @@ tlb_huge_update_store:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_store
+ beqz t0, tlb_huge_update_store
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -345,28 +345,28 @@ tlb_huge_update_store:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_store:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_store)
SYM_FUNC_START(handle_tlb_modify)
@@ -378,7 +378,7 @@ SYM_FUNC_START(handle_tlb_modify)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_modify
+ bltz t0, vmalloc_modify
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_modify:
@@ -411,7 +411,7 @@ vmalloc_done_modify:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_modify
+ bnez t0, tlb_huge_update_modify
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -431,12 +431,12 @@ smp_pgtable_change_modify:
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_modify
+ beqz t0, smp_pgtable_change_modify
#else
st.d t0, t1, 0
#endif
@@ -454,7 +454,7 @@ leave_modify:
ertn
#ifdef CONFIG_64BIT
vmalloc_modify:
- la.abs t1, swapper_pg_dir
+ la.abs t1, swapper_pg_dir
b vmalloc_done_modify
#endif
@@ -471,14 +471,14 @@ tlb_huge_update_modify:
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_modify
+ beqz t0, tlb_huge_update_modify
ld.d t0, t1, 0
#else
st.d t0, t1, 0
@@ -504,28 +504,28 @@ tlb_huge_update_modify:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbwr
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_modify:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_modify)
SYM_FUNC_START(handle_tlb_refill)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7aa12e88c580..4d8f26c1399b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -256,6 +256,7 @@ config PPC
select IRQ_FORCED_THREADING
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
@@ -281,6 +282,10 @@ config PPC
# Please keep this list sorted alphabetically.
#
+config PPC_LONG_DOUBLE_128
+ depends on PPC64
+ def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
+
config PPC_BARRIER_NOSPEC
bool
default y
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 09a9ae5f3656..b3de6102a907 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -19,8 +19,6 @@
#include <linux/pagemap.h>
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
#define tlb_flush tlb_flush
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f91f0f29a566..c8cf924bf9c0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -20,6 +20,7 @@ CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += -fno-stack-protector
CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 34cf8a598617..81029d40a672 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
endif
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
+KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# GCC versions that support the "-mstrict-align" option default to allowing
# unaligned accesses. While unaligned accesses are explicitly allowed in the
@@ -110,7 +111,7 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
- $(build)=arch/riscv/kernel/compat_vdso $@)
+ $(build)=arch/riscv/kernel/compat_vdso compat_$@)
ifeq ($(KBUILD_EXTMOD),)
ifeq ($(CONFIG_MMU),y)
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
index 039b92abf046..f72540bd14a3 100644
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -35,7 +35,7 @@
gpio-keys {
compatible = "gpio-keys";
- key0 {
+ key {
label = "KEY0";
linux,code = <BTN_0>;
gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index b9e30df127fe..8abdbe26a1d0 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -47,7 +47,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index 8d23401b0bbb..3c6df1ecf76f 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -52,7 +52,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 24fd83b43d9d..03c9843d503e 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -46,19 +46,19 @@
gpio-keys {
compatible = "gpio-keys";
- up {
+ key-up {
label = "UP";
linux,code = <BTN_1>;
gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
};
- press {
+ key-press {
label = "PRESS";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
};
- down {
+ key-down {
label = "DOWN";
linux,code = <BTN_2>;
gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index 25341f38292a..7164ad063178 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -23,7 +23,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index c71d6591d539..33bb60a354cd 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -78,7 +78,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index 9cb85095fd45..0cb94992c15b 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -349,7 +349,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
{
const char *strtab, *name, *shstrtab;
const Elf_Shdr *sechdrs;
- Elf_Rela *relas;
+ Elf64_Rela *relas;
int i, r_type;
/* String & section header string table */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8cd9e56c629b..5a1a8dfda6f8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -204,6 +204,7 @@ config S390
select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index 2c6e1c6ecbe7..4120c428dc37 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -2,7 +2,7 @@
/*
* Kernel interface for the s390 arch_random_* functions
*
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -14,6 +14,7 @@
#ifdef CONFIG_ARCH_RANDOM
#include <linux/static_key.h>
+#include <linux/preempt.h>
#include <linux/atomic.h>
#include <asm/cpacf.h>
@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
atomic64_add(sizeof(*v), &s390_arch_random_counter);
return true;
@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
atomic64_add(sizeof(*v), &s390_arch_random_counter);
return true;
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f0eb1b..3a5c8fb590e5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index ba449c47effd..4f7d1dfbc608 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -67,6 +67,8 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select MMU_GATHER_RCU_TABLE_FREE if SMP
+ select MMU_GATHER_MERGE_VMAS
+ select MMU_GATHER_NO_FLUSH_CACHE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index 779a5a0f0608..3037187482db 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm);
void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
void flush_tlb_pending(void);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
/*
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e58798f636d4..52a7f91527fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
@@ -2473,7 +2474,7 @@ config RETHUNK
bool "Enable return-thunks"
depends on RETPOLINE && CC_HAS_RETURN_THUNK
select OBJTOOL if HAVE_OBJTOOL
- default y
+ default y if X86_64
help
Compile the kernel with the return-thunks compiler option to guard
against kernel-to-user data leaks by avoiding return speculation.
@@ -2482,21 +2483,21 @@ config RETHUNK
config CPU_UNRET_ENTRY
bool "Enable UNRET on kernel entry"
- depends on CPU_SUP_AMD && RETHUNK
+ depends on CPU_SUP_AMD && RETHUNK && X86_64
default y
help
Compile the kernel with support for the retbleed=unret mitigation.
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
- depends on CPU_SUP_AMD
+ depends on CPU_SUP_AMD && X86_64
default y
help
Compile the kernel with support for the retbleed=ibpb mitigation.
config CPU_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
- depends on CPU_SUP_INTEL
+ depends on CPU_SUP_INTEL && X86_64
default y
help
Compile the kernel with support for the spectre_v2=ibrs mitigation.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f40dad30d50..7854685c5f25 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,6 +27,7 @@ RETHUNK_CFLAGS := -mfunction-return=thunk-extern
RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
endif
+export RETHUNK_CFLAGS
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 13179f31fe10..4f70fb6c2c1e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -278,9 +278,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -288,7 +288,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && x86_pmu.lbr_has_tsx;
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void)
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
- fallthrough;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 00f5227c8459..5fe7f6c8a7a4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,7 +219,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
+#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -302,6 +302,7 @@
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 6b0f31fb53f7..503a577814b2 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -164,4 +164,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
/* prctl */
extern long fpu_xstate_prctl(int option, unsigned long arg2);
+extern void fpu_idle_fpregs(void);
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 29dd27b5a339..3a8fdf881313 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -13,6 +13,7 @@
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK 0xf0
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 42c09bc8ae18..cba942006ffe 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -309,6 +309,8 @@ do { \
alternative_msr_write(MSR_IA32_SPEC_CTRL, \
spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 19514524f0f8..4a23e52fe0ee 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -72,7 +72,6 @@ static inline u64 lower_bits(u64 val, unsigned int bits)
struct real_mode_header;
enum stack_type;
-struct ghcb;
/* Early IDT entry points for #VC handler */
extern void vc_no_ghcb(void);
@@ -156,11 +155,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- bool set_ghcb_msr,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2);
+
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 45b18eb94fa1..35f709f619fb 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -295,6 +295,15 @@ static inline int enqcmds(void __iomem *dst, const void *src)
return 0;
}
+static inline void tile_release(void)
+{
+ /*
+ * Instruction opcode for TILERELEASE; supported in binutils
+ * version >= 2.36.
+ */
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..580636cdc257 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,9 +2,6 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4af5579c7ef7..cda3118f3b27 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d6858533e6e5..62f6b8b7c4a5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -555,7 +555,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
dest = addr + insn.length + insn.immediate.value;
if (__static_call_fixup(addr, op, dest) ||
- WARN_ON_ONCE(dest != &__x86_return_thunk))
+ WARN_ONCE(dest != &__x86_return_thunk,
+ "missing return thunk: %pS-%pS: %*ph",
+ addr, dest, 5, addr))
continue;
DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 190e0f763375..4266b64631a4 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,17 +19,23 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
+#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
+#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -41,8 +47,11 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
{}
};
@@ -61,12 +70,15 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{}
};
@@ -81,6 +93,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index aa34f908c39f..6761668100b9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -975,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
@@ -1415,6 +1416,8 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_IBRS:
setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
break;
case SPECTRE_V2_LFENCE:
@@ -1516,7 +1519,17 @@ static void __init spectre_v2_select_mitigation(void)
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ boot_cpu_has(X86_FEATURE_IBPB) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+
+ if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
+ pr_info("Enabling Speculation Barrier for firmware calls\n");
+ }
+
+ } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fd5dead8371c..663f6e6dd288 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -682,9 +682,9 @@ static void init_intel(struct cpuinfo_x86 *c)
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
- if (!(l1 & (1<<11)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_BTS);
- if (!(l1 & (1<<12)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_PEBS);
}
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ffb3233..12cf2e7ca33c 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -33,6 +33,8 @@
#include "internal.h"
+static bool hw_injection_possible = true;
+
/*
* Collect all the MCi_XXX settings
*/
@@ -339,6 +341,8 @@ static int __set_inj(const char *buf)
for (i = 0; i < N_INJ_TYPES; i++) {
if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ if (i > SW_INJ && !hw_injection_possible)
+ continue;
inj_type = i;
return 0;
}
@@ -717,11 +721,54 @@ static void __init debugfs_init(void)
&i_mce, dfs_fls[i].fops);
}
+static void check_hw_inj_possible(void)
+{
+ int cpu;
+ u8 bank;
+
+ /*
+ * This behavior exists only on SMCA systems though its not directly
+ * related to SMCA.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_SMCA))
+ return;
+
+ cpu = get_cpu();
+
+ for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
+ u64 status = MCI_STATUS_VAL, ipid;
+
+ /* Check whether bank is populated */
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ if (!ipid)
+ continue;
+
+ toggle_hw_mce_inject(cpu, true);
+
+ wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+
+ if (!status) {
+ hw_injection_possible = false;
+ pr_warn("Platform does not allow *hardware* error injection."
+ "Try using APEI EINJ instead.\n");
+ }
+
+ toggle_hw_mce_inject(cpu, false);
+
+ break;
+ }
+
+ put_cpu();
+}
+
static int __init inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
+ check_hw_inj_possible();
+
debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 4ae0e603f7fa..7e03f5b7f6bd 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- if (mce_flags.smca) {
+ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index c04b933f48d3..02039ec3597d 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -476,8 +476,8 @@ static bool __init vmware_legacy_x2apic_available(void)
{
uint32_t eax, ebx, ecx, edx;
VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
- return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
- (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
+ return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
+ (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
}
#ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0531d6a06df5..3b28c5b25e12 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -851,3 +851,17 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
*/
return 0;
}
+
+/*
+ * Initialize register state that may prevent from entering low-power idle.
+ * This function will be invoked from the cpuidle driver only when needed.
+ */
+void fpu_idle_fpregs(void)
+{
+ /* Note: AMX_TILE being enabled implies XGETBV1 support */
+ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
+ (xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
+ tile_release();
+ fpregs_deactivate(&current->thread.fpu);
+ }
+}
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 6b07faaa1579..23154d24b117 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -27,6 +27,11 @@ static __init int register_e820_pmem(void)
* simply here to trigger the module to load on demand.
*/
pdev = platform_device_alloc("e820_pmem", -1);
- return platform_device_add(pdev);
+
+ rc = platform_device_add(pdev);
+ if (rc)
+ platform_device_put(pdev);
+
+ return rc;
}
device_initcall(register_e820_pmem);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d456ce21c255..58a6ea472db9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -810,24 +810,43 @@ static void amd_e400_idle(void)
}
/*
- * Intel Core2 and older machines prefer MWAIT over HALT for C1.
- * We can't rely on cpuidle installing MWAIT, because it will not load
- * on systems that support only C1 -- so the boot default must be MWAIT.
+ * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
+ * exists and whenever MONITOR/MWAIT extensions are present there is at
+ * least one C1 substate.
*
- * Some AMD machines are the opposite, they depend on using HALT.
- *
- * So for default C1, which is used during boot until cpuidle loads,
- * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait
+ * is passed to kernel commandline parameter.
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ u32 eax, ebx, ecx, edx;
+
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
+ /* MWAIT is not supported on this platform. Fallback to HALT */
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
return 0;
- return 1;
+ /* Monitor has a bug. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR))
+ return 0;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ /*
+ * If MWAIT extensions are not available, it is safe to use MWAIT
+ * with EAX=0, ECX=0.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED))
+ return 1;
+
+ /*
+ * If MWAIT extensions are available, there should be at least one
+ * MWAIT C1 substate present.
+ */
+ return (edx & MWAIT_C1_SUBSTATE_MASK);
}
/*
@@ -932,9 +951,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index b478edf43bec..3a5b0c9c4fcc 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -219,9 +219,10 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
return ES_VMM_ERROR;
}
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
- struct es_em_ctxt *ctxt, u64 exit_code,
- u64 exit_info_1, u64 exit_info_2)
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
ghcb->protocol_version = ghcb_version;
@@ -231,14 +232,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
- /*
- * Hyper-V unenlightened guests use a paravisor for communicating and
- * GHCB pages are being allocated and set up by that paravisor. Linux
- * should not change the GHCB page's physical address.
- */
- if (set_ghcb_msr)
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
+ sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
return verify_exception_info(ghcb, ctxt);
@@ -795,7 +789,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
*/
sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
ghcb_set_sw_scratch(ghcb, sw_scratch);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
exit_info_1, exit_info_2);
if (ret != ES_OK)
return ret;
@@ -837,8 +831,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rax(ghcb, rax);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
- SVM_EXIT_IOIO, exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
if (ret != ES_OK)
return ret;
@@ -894,7 +887,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
/* xgetbv will cause #GP - use reset value for xcr0 */
ghcb_set_xcr0(ghcb, 1);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
if (ret != ES_OK)
return ret;
@@ -919,7 +912,7 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
enum es_result ret;
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
if (ret != ES_OK)
return ret;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c05f0124c410..63dc626627a0 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -786,7 +786,7 @@ static int vmgexit_psc(struct snp_psc_desc *desc)
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
/* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/*
* Page State Change VMGEXIT can pass error code through
@@ -1212,8 +1212,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rdx(ghcb, regs->dx);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
- exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
if ((ret == ES_OK) && (!exit_info_1)) {
regs->ax = ghcb->save.rax;
@@ -1452,7 +1451,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}
/*
@@ -1628,7 +1627,7 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax(ghcb, val);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1658,7 +1657,7 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
}
static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
@@ -1667,7 +1666,7 @@ static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt
ghcb_set_rcx(ghcb, ctxt->regs->cx);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1708,7 +1707,7 @@ static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
if (x86_platform.hyper.sev_es_hcall_prepare)
x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
if (ret != ES_OK)
return ret;
@@ -2197,7 +2196,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
ghcb_set_rbx(ghcb, input->data_npages);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
if (ret)
goto e_put;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 143e37298d8a..e5fa335a4ea7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6029,6 +6029,11 @@ split_irqchip_unlock:
r = 0;
break;
case KVM_CAP_X86_USER_SPACE_MSR:
+ r = -EINVAL;
+ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER))
+ break;
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
@@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
+ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 57ba5502aecf..82a042c03824 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -856,7 +856,7 @@ int devmem_is_allowed(unsigned long pagenr)
/*
* This must follow RAM test, since System RAM is considered a
- * restricted resource under CONFIG_STRICT_IOMEM.
+ * restricted resource under CONFIG_STRICT_DEVMEM.
*/
if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
/* Low 1MB bypasses iomem restrictions. */
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e44e938885b7..7418c367e328 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
return vma_pkey(vma);
}
-#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
+#define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/*
* Make the default PKRU value (at execve() time) as restrictive
@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
- PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
- PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
- PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
- PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
+u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
+ PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
+ PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
+ PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
+ PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
+ PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
+ PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
+ PKRU_AD_MASK(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d400b6d9d246..c1e31e9a85d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
- u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0;
+ u64 mm_tlb_gen;
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
return;
}
+ if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
+ f->new_tlb_gen <= local_tlb_gen)) {
+ /*
+ * The TLB is already up to date in respect to f->new_tlb_gen.
+ * While the core might be still behind mm_tlb_gen, checking
+ * mm_tlb_gen unnecessarily would have negative caching effects
+ * so avoid it.
+ */
+ return;
+ }
+
+ /*
+ * Defer mm_tlb_gen reading as long as possible to avoid cache
+ * contention.
+ */
+ mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+
if (unlikely(local_tlb_gen == mm_tlb_gen)) {
/*
* There's nothing to do: we're already up to date. This can
@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
/* Partial flush */
unsigned long addr = f->start;
+ /* Partial flush cannot have invalid generations */
+ VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
+
+ /* Partial flush must have valid mm */
+ VM_WARN_ON(f->mm == NULL);
+
nr_invalidate = (f->end - f->start) >> f->stride_shift;
while (addr < f->end) {
@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
struct flush_tlb_info *info;
preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+ info = get_flush_tlb_info(NULL, start, end, 0, false,
+ TLB_GENERATION_INVALID);
on_each_cpu(do_kernel_range_flush, info, 1);
@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
int cpu = get_cpu();
- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
+ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+ TLB_GENERATION_INVALID);
/*
* flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling
diff --git a/certs/Kconfig b/certs/Kconfig
index 476755703cf8..bf9b511573d7 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -43,6 +43,7 @@ config SYSTEM_TRUSTED_KEYRING
bool "Provide system-wide ring of trusted keys"
depends on KEYS
depends on ASYMMETRIC_KEY_TYPE
+ depends on X509_CERTIFICATE_PARSER
help
Provide a system keyring to which trusted keys can be added. Keys in
the keyring are considered to be trusted. Keys may be added at will
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 6ff1901d7d43..3c6d4ef87be0 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -782,7 +782,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
addr = ioremap(gas_t->address, gas_t->bit_width/8);
@@ -809,7 +810,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
}
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
} else {
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
diff --git a/drivers/clk/clk-lan966x.c b/drivers/clk/clk-lan966x.c
index d1535ac13e89..81cb90955d68 100644
--- a/drivers/clk/clk-lan966x.c
+++ b/drivers/clk/clk-lan966x.c
@@ -213,7 +213,7 @@ static int lan966x_gate_clk_register(struct device *dev,
hw_data->hws[i] =
devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name,
- "lan966x", 0, base,
+ "lan966x", 0, gate_base,
clk_gate_desc[idx].bit_idx,
0, &clk_gate_lock);
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
index 29a8c710ae06..b7962e5149a5 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
@@ -138,6 +138,7 @@ static struct ccu_common *sun50i_h6_r_ccu_clks[] = {
&r_apb2_rsb_clk.common,
&r_apb1_ir_clk.common,
&r_apb1_w1_clk.common,
+ &r_apb1_rtc_clk.common,
&ir_clk.common,
&w1_clk.common,
};
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 59b0bedc9c24..c8fa7dcfdbd0 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -103,9 +103,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
dmi_memdev_name(handle, &bank, &device);
- /* both strings must be non-zero */
- if (bank && *bank && device && *device)
- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
+ /*
+ * Set to a NULL string when both bank and device are zero. In this case,
+ * the label assigned by default will be preserved.
+ */
+ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
+ (bank && *bank) ? bank : "",
+ (bank && *bank && device && *device) ? " " : "",
+ (device && *device) ? device : "");
}
static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 1cee64b80a7e..f7d37c282819 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -514,6 +514,28 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
memset(p, 0, sizeof(*p));
}
+static void enable_intr(struct synps_edac_priv *priv)
+{
+ /* Enable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(DDR_UE_MASK | DDR_CE_MASK,
+ priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+
+}
+
+static void disable_intr(struct synps_edac_priv *priv)
+{
+ /* Disable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(0x0, priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+}
+
/**
* intr_handler - Interrupt Handler for ECC interrupts.
* @irq: IRQ number.
@@ -555,6 +577,9 @@ static irqreturn_t intr_handler(int irq, void *dev_id)
/* v3.0 of the controller does not have this register */
if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+ else
+ enable_intr(priv);
+
return IRQ_HANDLED;
}
@@ -837,25 +862,6 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev)
init_csrows(mci);
}
-static void enable_intr(struct synps_edac_priv *priv)
-{
- /* Enable UE/CE Interrupts */
- if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
- writel(DDR_UE_MASK | DDR_CE_MASK,
- priv->baseaddr + ECC_CLR_OFST);
- else
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
-
-}
-
-static void disable_intr(struct synps_edac_priv *priv)
-{
- /* Disable UE/CE Interrupts */
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
-}
-
static int setup_irq(struct mem_ctl_info *mci,
struct platform_device *pdev)
{
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 08bc52c3cdcb..ecd7d169470b 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = {
.reg_bits = 8,
.val_bits = 8,
+ .use_single_read = true,
+ .use_single_write = true,
+
.readable_reg = pca953x_readable_register,
.writeable_reg = pca953x_writeable_register,
.volatile_reg = pca953x_volatile_register,
@@ -906,15 +909,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
{
DECLARE_BITMAP(val, MAX_LINE);
+ u8 regaddr;
int ret;
- ret = regcache_sync_region(chip->regmap, chip->regs->output,
- chip->regs->output + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
- ret = regcache_sync_region(chip->regmap, chip->regs->direction,
- chip->regs->direction + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
@@ -1127,14 +1133,14 @@ static int pca953x_regcache_sync(struct device *dev)
* sync these registers first and only then sync the rest.
*/
regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret);
return ret;
}
regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret);
return ret;
@@ -1144,7 +1150,7 @@ static int pca953x_regcache_sync(struct device *dev)
if (chip->driver_data & PCA_PCAL) {
regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT latch registers: %d\n",
ret);
@@ -1153,7 +1159,7 @@ static int pca953x_regcache_sync(struct device *dev)
regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT mask registers: %d\n",
ret);
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index b6d3a57e27ed..7f8e2fed2988 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v)
const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5);
map[index] &= ~(0xFFFFFFFFul << offset);
- map[index] |= v << offset;
+ map[index] |= (unsigned long)v << offset;
}
static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch)
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 0c9a63becfef..b26e64338376 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -421,6 +421,10 @@ out_free_lh:
* @work: the worker that implements software debouncing
* @sw_debounced: flag indicating if the software debouncer is active
* @level: the current debounced physical level of the line
+ * @hdesc: the Hardware Timestamp Engine (HTE) descriptor
+ * @raw_level: the line level at the time of event
+ * @total_discard_seq: the running counter of the discarded events
+ * @last_seqno: the last sequence number before debounce period expires
*/
struct line {
struct gpio_desc *desc;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6b6d46e29e6e..4608599ba6bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1364,16 +1364,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
- struct amdgpu_bo *pd = vm->root.bo;
if (!process_info)
return;
- /* Release eviction fence from PD */
- amdgpu_bo_reserve(pd, false);
- amdgpu_bo_fence(pd, NULL, false);
- amdgpu_bo_unreserve(pd);
-
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 714178f1b6c6..2168163aad2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
-
+ mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+ mutex_init(&list->bo_list_mutex);
*result = list;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 529d52a204cf..9caea1688fc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -47,6 +47,10 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b28af04b0c3e..d8f1335bc68f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
+ mutex_lock(&p->bo_list->bo_list_mutex);
+
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
@@ -651,6 +653,7 @@ out_free_user_pages:
kvfree(e->user_pages);
e->user_pages = NULL;
}
+ mutex_unlock(&p->bo_list->bo_list_mutex);
}
return r;
}
@@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ mutex_unlock(&parser->bo_list->bo_list_mutex);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
+ if (r) {
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
+ }
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
- if (r)
+ if (r) {
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
+ }
}
r = amdgpu_vm_handle_moved(adev, vm);
@@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0ba0598eba20..ec6771e87e73 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+ select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 93ac33a8de9a..3087dd1a1856 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1653,7 +1653,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
#endif
- if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
init_completion(&adev->dm.dmub_aux_transfer_done);
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
if (!adev->dm.dmub_notify) {
@@ -1689,6 +1689,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
goto error;
}
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point, for
+ * example HPD from DPIA.
+ */
+ if (dc_is_dmub_outbox_supported(adev->dm.dc))
+ dc_enable_dmub_outbox(adev->dm.dc);
+
/* create fake encoders for MST */
dm_dp_create_fake_mst_encoders(adev);
@@ -2678,9 +2685,6 @@ static int dm_resume(void *handle)
*/
link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
- if (dc_enable_dmub_notifications(adev->dm.dc))
- amdgpu_dm_outbox_init(adev);
-
r = dm_dmub_hw_init(adev);
if (r)
DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -2698,6 +2702,11 @@ static int dm_resume(void *handle)
}
}
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ amdgpu_dm_outbox_init(adev);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
+
WARN_ON(!dc_commit_state(dm->dc, dc_state));
dm_gpureset_commit_state(dm->cached_dc_state, dm);
@@ -2719,13 +2728,15 @@ static int dm_resume(void *handle)
/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
dc_resource_state_construct(dm->dc, dm_state->context);
- /* Re-enable outbox interrupts for DPIA. */
- if (dc_enable_dmub_notifications(adev->dm.dc))
- amdgpu_dm_outbox_init(adev);
-
/* Before powering on DC we need to re-initialize DMUB. */
dm_dmub_hw_resume(adev);
+ /* Re-enable outbox interrupts for DPIA. */
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ amdgpu_dm_outbox_init(adev);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
+
/* power on hardware */
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c
index d5962a34c01d..e5fc875990c4 100644
--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem,
struct iosys_map *map)
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ int ret;
+
+ dma_resv_lock(gem->resv, NULL);
+ ret = ttm_bo_vmap(bo, map);
+ dma_resv_unlock(gem->resv);
- return ttm_bo_vmap(bo, map);
+ return ret;
}
EXPORT_SYMBOL(drm_gem_ttm_vmap);
@@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem,
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ dma_resv_lock(gem->resv, NULL);
ttm_bo_vunmap(bo, map);
+ dma_resv_unlock(gem->resv);
}
EXPORT_SYMBOL(drm_gem_ttm_vunmap);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 09f82545789f..44e7339e7a4a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -273,10 +273,17 @@ struct intel_context {
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
- /** @wqi_head: head pointer in work queue */
+ /** @wqi_head: cached head pointer in work queue */
u16 wqi_head;
- /** @wqi_tail: tail pointer in work queue */
+ /** @wqi_tail: cached tail pointer in work queue */
u16 wqi_tail;
+ /** @wq_head: pointer to the actual head in work queue */
+ u32 *wq_head;
+ /** @wq_tail: pointer to the actual head in work queue */
+ u32 *wq_tail;
+ /** @wq_status: pointer to the status in work queue */
+ u32 *wq_status;
+
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine);
int intel_engine_stop_cs(struct intel_engine_cs *engine);
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 14c6ddbbfde8..5b6ce10cb158 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1282,10 +1282,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
/*
- * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+ * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (GRAPHICS_VER(engine->i915) == 12)
+ if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
@@ -1308,6 +1308,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
ENGINE_TRACE(engine, "\n");
+ /*
+ * TODO: Find out why occasionally stopping the CS times out. Seen
+ * especially with gem_eio tests.
+ *
+ * Occasionally trying to stop the cs times out, but does not adversely
+ * affect functionality. The timeout is set as a config parameter that
+ * defaults to 100ms. In most cases the follow up operation is to wait
+ * for pending MI_FORCE_WAKES. The assumption is that this timeout is
+ * sufficient for any pending MI_FORCEWAKEs to complete. Once root
+ * caused, the caller must check and handle the return from this
+ * function.
+ */
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
"timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
@@ -1334,6 +1346,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+ static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+ [RCS0] = MSG_IDLE_CS,
+ [BCS0] = MSG_IDLE_BCS,
+ [VCS0] = MSG_IDLE_VCS0,
+ [VCS1] = MSG_IDLE_VCS1,
+ [VCS2] = MSG_IDLE_VCS2,
+ [VCS3] = MSG_IDLE_VCS3,
+ [VCS4] = MSG_IDLE_VCS4,
+ [VCS5] = MSG_IDLE_VCS5,
+ [VCS6] = MSG_IDLE_VCS6,
+ [VCS7] = MSG_IDLE_VCS7,
+ [VECS0] = MSG_IDLE_VECS0,
+ [VECS1] = MSG_IDLE_VECS1,
+ [VECS2] = MSG_IDLE_VECS2,
+ [VECS3] = MSG_IDLE_VECS3,
+ [CCS0] = MSG_IDLE_CS,
+ [CCS1] = MSG_IDLE_CS,
+ [CCS2] = MSG_IDLE_CS,
+ [CCS3] = MSG_IDLE_CS,
+ };
+ u32 val;
+
+ if (!_reg[engine->id].reg) {
+ drm_err(&engine->i915->drm,
+ "MSG IDLE undefined for engine id %u\n", engine->id);
+ return 0;
+ }
+
+ val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+ /* bits[29:25] & bits[13:9] >> shift */
+ return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+ int ret;
+
+ /* Ensure GPM receives fw up/down after CS is stopped */
+ udelay(1);
+
+ /* Wait for forcewake request to complete in GPM */
+ ret = __intel_wait_for_register_fw(gt->uncore,
+ GEN9_PWRGT_DOMAIN_STATUS,
+ fw_mask, fw_mask, 5000, 0, NULL);
+
+ /* Ensure CS receives fw ack from GPM */
+ udelay(1);
+
+ if (ret)
+ GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
+{
+ u32 fw_pending = __cs_pending_mi_force_wakes(engine);
+
+ if (fw_pending)
+ __gpm_wait_for_fw_complete(engine->gt, fw_pending);
+}
+
static u32
read_subslice_reg(const struct intel_engine_cs *engine,
int slice, int subslice, i915_reg_t reg)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 86f7a9ac1c39..0627fa10d2dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
+static u32 map_i915_prio_to_lrc_desc_prio(int prio)
+{
+ if (prio > I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_HIGH;
+ else if (prio < I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_LOW;
+ else
+ return GEN12_CTX_PRIORITY_NORMAL;
+}
+
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
@@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq)
desc = ce->lrc.desc;
if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
- desc |= lrc_desc_priority(rq_prio(rq));
+ desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));
/*
* WaIdleLiteRestore:bdw,skl
@@ -2958,6 +2968,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
ring_set_paused(engine, 1);
intel_engine_stop_cs(engine);
+ /*
+ * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+ if (IS_GRAPHICS_VER(engine->i915, 11, 12))
+ intel_engine_wait_for_pending_mi_fw(engine);
+
engine->execlists.reset_ccid = active_ccid(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 31be734010db..a390f0813c8b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -111,16 +111,6 @@ enum {
#define XEHP_SW_COUNTER_SHIFT 58
#define XEHP_SW_COUNTER_WIDTH 6
-static inline u32 lrc_desc_priority(int prio)
-{
- if (prio > I915_PRIORITY_NORMAL)
- return GEN12_CTX_PRIORITY_HIGH;
- else if (prio < I915_PRIORITY_NORMAL)
- return GEN12_CTX_PRIORITY_LOW;
- else
- return GEN12_CTX_PRIORITY_NORMAL;
-}
-
static inline void lrc_runtime_start(struct intel_context *ce)
{
struct intel_context_stats *stats = &ce->stats;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 4ef9990ed7f8..29ef8afc8c2e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -122,6 +122,9 @@ enum intel_guc_action {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2c4ad4a65089..8c6885f43d1a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -310,8 +310,8 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
if (IS_DG2(gt->i915))
flags |= GUC_WA_DUAL_QUEUE;
- /* Wa_22011802037: graphics version 12 */
- if (GRAPHICS_VER(gt->i915) == 12)
+ /* Wa_22011802037: graphics version 11/12 */
+ if (IS_GRAPHICS_VER(gt->i915, 11, 12))
flags |= GUC_WA_PRE_PARSER;
/* Wa_16011777198:dg2 */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 966e69a8b1c1..9feda105f913 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -170,6 +170,11 @@ struct intel_guc {
/** @ads_engine_usage_size: size of engine usage in the ADS */
u32 ads_engine_usage_size;
+ /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
+ struct i915_vma *lrc_desc_pool_v69;
+ /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
+ void *lrc_desc_pool_vaddr_v69;
+
/**
* @context_lookup: used to resolve intel_context from guc_id, if a
* context is present in this structure it is registered with the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 42cb7a9a6199..89a7e5ec0614 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -203,6 +203,20 @@ struct guc_wq_item {
u32 fence_id;
} __packed;
+struct guc_process_desc_v69 {
+ u32 stage_id;
+ u64 db_base_addr;
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u64 wq_base_addr;
+ u32 wq_size_bytes;
+ u32 wq_status;
+ u32 engine_presence;
+ u32 priority;
+ u32 reserved[36];
+} __packed;
+
struct guc_sched_wq_desc {
u32 head;
u32 tail;
@@ -227,6 +241,37 @@ struct guc_ctxt_registration_info {
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc_v69 {
+ u32 hw_context_desc;
+ u32 slpm_perf_mode_hint; /* SPLC v1 only */
+ u32 slpm_freq_hint;
+ u32 engine_submit_mask; /* In logical space */
+ u8 engine_class;
+ u8 reserved0[3];
+ u32 priority;
+ u32 process_desc;
+ u32 wq_addr;
+ u32 wq_size;
+ u32 context_flags; /* CONTEXT_REGISTRATION_* */
+ /* Time for one workload to execute. (in micro seconds) */
+ u32 execution_quantum;
+ /* Time to wait for a preemption request to complete before issuing a
+ * reset. (in micro seconds).
+ */
+ u32 preemption_timeout;
+ u32 policy_flags; /* CONTEXT_POLICY_* */
+ u32 reserved1[19];
+} __packed;
+
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
u32 kl;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1726f0f19901..2d9f5f1c79d3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -414,12 +414,15 @@ struct sync_semaphore {
};
struct parent_scratch {
- struct guc_sched_wq_desc wq_desc;
+ union guc_descs {
+ struct guc_sched_wq_desc wq_desc;
+ struct guc_process_desc_v69 pdesc;
+ } descs;
struct sync_semaphore go;
struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
- u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+ u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
u32 wq[WQ_SIZE / sizeof(u32)];
@@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce)
LRC_STATE_OFFSET) / sizeof(u32)));
}
+static struct guc_process_desc_v69 *
+__get_process_desc_v69(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->descs.pdesc;
+}
+
static struct guc_sched_wq_desc *
-__get_wq_desc(struct intel_context *ce)
+__get_wq_desc_v70(struct intel_context *ce)
{
struct parent_scratch *ps = __get_parent_scratch(ce);
- return &ps->wq_desc;
+ return &ps->descs.wq_desc;
}
-static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
{
/*
* Check for space in work queue. Caching a value of head pointer in
@@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
#define AVAILABLE_SPACE \
CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
- ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
+ ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
if (wqi_size > AVAILABLE_SPACE)
return NULL;
@@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
return ce;
}
+static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
+{
+ struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
+
+ if (!base)
+ return NULL;
+
+ GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
+
+ return &base[index];
+}
+
+static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
+{
+ u32 size;
+ int ret;
+
+ size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
+ GUC_MAX_CONTEXT_ID);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
+ (void **)&guc->lrc_desc_pool_vaddr_v69);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
+{
+ if (!guc->lrc_desc_pool_vaddr_v69)
+ return;
+
+ guc->lrc_desc_pool_vaddr_v69 = NULL;
+ i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
+}
+
static inline bool guc_submission_initialized(struct intel_guc *guc)
{
return guc->submission_initialized;
}
+static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
+{
+ struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
+
+ if (desc)
+ memset(desc, 0, sizeof(*desc));
+}
+
static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
{
return __get_context(guc, id);
@@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
if (unlikely(!guc_submission_initialized(guc)))
return;
+ _reset_lrc_desc_v69(guc, id);
+
/*
* xarray API doesn't have xa_erase_irqsave wrapper, so calling
* the lower level functions directly.
@@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
true, timeout);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
static int try_context_registration(struct intel_context *ce, bool loop);
static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
@@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(context_guc_id_invalid(ce));
if (context_policy_required(ce)) {
- err = guc_context_policy_init(ce, false);
+ err = guc_context_policy_init_v70(ce, false);
if (err)
return err;
}
@@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
return (WQ_SIZE - ce->parallel.guc.wqi_tail);
}
-static void write_wqi(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static void write_wqi(struct intel_context *ce, u32 wqi_size)
{
BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
@@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc,
ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
(WQ_SIZE - 1);
- WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
+ WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
}
static int guc_wq_noop_append(struct intel_context *ce)
{
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
- u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
+ u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
if (!wqi)
@@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq)
{
struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_context *child;
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
unsigned int wqi_size = (ce->parallel.number_children + 4) *
sizeof(u32);
u32 *wqi;
@@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
return ret;
}
- wqi = get_wq_pointer(wq_desc, ce, wqi_size);
+ wqi = get_wq_pointer(ce, wqi_size);
if (!wqi)
return -EBUSY;
@@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
for_each_child(ce, child)
*wqi++ = child->ring->tail / sizeof(u64);
- write_wqi(wq_desc, ce, wqi_size);
+ write_wqi(ce, wqi_size);
return 0;
}
@@ -1527,87 +1578,18 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
lrc_update_regs(ce, engine, head);
}
-static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
-{
- static const i915_reg_t _reg[I915_NUM_ENGINES] = {
- [RCS0] = MSG_IDLE_CS,
- [BCS0] = MSG_IDLE_BCS,
- [VCS0] = MSG_IDLE_VCS0,
- [VCS1] = MSG_IDLE_VCS1,
- [VCS2] = MSG_IDLE_VCS2,
- [VCS3] = MSG_IDLE_VCS3,
- [VCS4] = MSG_IDLE_VCS4,
- [VCS5] = MSG_IDLE_VCS5,
- [VCS6] = MSG_IDLE_VCS6,
- [VCS7] = MSG_IDLE_VCS7,
- [VECS0] = MSG_IDLE_VECS0,
- [VECS1] = MSG_IDLE_VECS1,
- [VECS2] = MSG_IDLE_VECS2,
- [VECS3] = MSG_IDLE_VECS3,
- [CCS0] = MSG_IDLE_CS,
- [CCS1] = MSG_IDLE_CS,
- [CCS2] = MSG_IDLE_CS,
- [CCS3] = MSG_IDLE_CS,
- };
- u32 val;
-
- if (!_reg[engine->id].reg)
- return 0;
-
- val = intel_uncore_read(engine->uncore, _reg[engine->id]);
-
- /* bits[29:25] & bits[13:9] >> shift */
- return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
-}
-
-static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
-{
- int ret;
-
- /* Ensure GPM receives fw up/down after CS is stopped */
- udelay(1);
-
- /* Wait for forcewake request to complete in GPM */
- ret = __intel_wait_for_register_fw(gt->uncore,
- GEN9_PWRGT_DOMAIN_STATUS,
- fw_mask, fw_mask, 5000, 0, NULL);
-
- /* Ensure CS receives fw ack from GPM */
- udelay(1);
-
- if (ret)
- GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
-}
-
-/*
- * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
- * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
- * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
- * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
- * are concerned only with the gt reset here, we use a logical OR of pending
- * forcewakeups from all reset domains and then wait for them to complete by
- * querying PWRGT_DOMAIN_STATUS.
- */
static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
{
- u32 fw_pending;
-
- if (GRAPHICS_VER(engine->i915) != 12)
+ if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
return;
- /*
- * Wa_22011802037
- * TODO: Occasionally trying to stop the cs times out, but does not
- * adversely affect functionality. The timeout is set as a config
- * parameter that defaults to 100ms. Assuming that this timeout is
- * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
- * timeout returned here until it is root caused.
- */
intel_engine_stop_cs(engine);
- fw_pending = __cs_pending_mi_force_wakes(engine);
- if (fw_pending)
- __gpm_wait_for_fw_complete(engine->gt, fw_pending);
+ /*
+ * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+ intel_engine_wait_for_pending_mi_fw(engine);
}
static void guc_reset_nop(struct intel_engine_cs *engine)
@@ -1868,20 +1850,34 @@ static void reset_fail_worker_func(struct work_struct *w);
int intel_guc_submission_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
if (guc->submission_initialized)
return 0;
+ if (guc->fw.major_ver_found < 70) {
+ ret = guc_lrc_desc_pool_create_v69(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
- return -ENOMEM;
+ if (!guc->submission_state.guc_ids_bitmap) {
+ ret = -ENOMEM;
+ goto destroy_pool;
+ }
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
guc->submission_initialized = true;
return 0;
+
+destroy_pool:
+ guc_lrc_desc_pool_destroy_v69(guc);
+
+ return ret;
}
void intel_guc_submission_fini(struct intel_guc *guc)
@@ -1890,6 +1886,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
return;
guc_flush_destroyed_contexts(guc);
+ guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
guc->submission_initialized = false;
@@ -2147,10 +2144,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int __guc_action_register_multi_lrc(struct intel_guc *guc,
- struct intel_context *ce,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc_v69);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
+}
+
+static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
+ struct intel_context *ce,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
struct intel_context *child;
u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
@@ -2190,9 +2211,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc,
return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
-static int __guc_action_register_context(struct intel_guc *guc,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_context_v69(struct intel_guc *guc,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_REGISTER_CONTEXT,
+ guc_id,
+ offset,
+ };
+
+ return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+ 0, loop);
+}
+
+static int __guc_action_register_context_v70(struct intel_guc *guc,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
u32 action[] = {
INTEL_GUC_ACTION_REGISTER_CONTEXT,
@@ -2213,24 +2249,52 @@ static int __guc_action_register_context(struct intel_guc *guc,
0, loop);
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info);
+static void prepare_context_registration_info_v69(struct intel_context *ce);
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info);
-static int register_context(struct intel_context *ce, bool loop)
+static int
+register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+ u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
+ ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
+
+ prepare_context_registration_info_v69(ce);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ return __guc_action_register_context_v69(guc, ce->guc_id.id,
+ offset, loop);
+}
+
+static int
+register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
{
struct guc_ctxt_registration_info info;
+
+ prepare_context_registration_info_v70(ce, &info);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
+ else
+ return __guc_action_register_context_v70(guc, &info, loop);
+}
+
+static int register_context(struct intel_context *ce, bool loop)
+{
struct intel_guc *guc = ce_to_guc(ce);
int ret;
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- prepare_context_registration_info(ce, &info);
-
- if (intel_context_is_parent(ce))
- ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
+ if (guc->fw.major_ver_found >= 70)
+ ret = register_context_v70(guc, ce, loop);
else
- ret = __guc_action_register_context(guc, &info, loop);
+ ret = register_context_v69(guc, ce, loop);
+
if (likely(!ret)) {
unsigned long flags;
@@ -2238,7 +2302,8 @@ static int register_context(struct intel_context *ce, bool loop)
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- guc_context_policy_init(ce, loop);
+ if (guc->fw.major_ver_found >= 70)
+ guc_context_policy_init_v70(ce, loop);
}
return ret;
@@ -2335,7 +2400,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
0, loop);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop)
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2394,8 +2459,108 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
return ret;
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info)
+static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
+ struct guc_lrc_desc_v69 *desc)
+{
+ desc->policy_flags = 0;
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
+
+ /* NB: For both of these, zero means disabled. */
+ desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+ desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+}
+
+static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
+{
+ /*
+ * this matches the mapping we do in map_i915_prio_to_guc_prio()
+ * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
+ */
+ switch (prio) {
+ default:
+ MISSING_CASE(prio);
+ fallthrough;
+ case GUC_CLIENT_PRIORITY_KMD_NORMAL:
+ return GEN12_CTX_PRIORITY_NORMAL;
+ case GUC_CLIENT_PRIORITY_NORMAL:
+ return GEN12_CTX_PRIORITY_LOW;
+ case GUC_CLIENT_PRIORITY_HIGH:
+ case GUC_CLIENT_PRIORITY_KMD_HIGH:
+ return GEN12_CTX_PRIORITY_HIGH;
+ }
+}
+
+static void prepare_context_registration_info_v69(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 ctx_id = ce->guc_id.id;
+ struct guc_lrc_desc_v69 *desc;
+ struct intel_context *child;
+
+ GEM_BUG_ON(!engine->mask);
+
+ /*
+ * Ensure LRC + CT vmas are is same region as write barrier is done
+ * based on CT vma region.
+ */
+ GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+ i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+ desc = __get_lrc_desc_v69(guc, ctx_id);
+ desc->engine_class = engine_class_to_guc_class(engine->class);
+ desc->engine_submit_mask = engine->logical_mask;
+ desc->hw_context_desc = ce->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+
+ /*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc_v69 *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc_v69(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ ce->parallel.guc.wq_head = &pdesc->head;
+ ce->parallel.guc.wq_tail = &pdesc->tail;
+ ce->parallel.guc.wq_status = &pdesc->wq_status;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc_v69(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
+}
+
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2420,6 +2585,8 @@ static void prepare_context_registration_info(struct intel_context *ce,
*/
info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
+ if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
+ info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
/*
@@ -2443,10 +2610,14 @@ static void prepare_context_registration_info(struct intel_context *ce,
info->wq_base_hi = upper_32_bits(wq_base_offset);
info->wq_size = WQ_SIZE;
- wq_desc = __get_wq_desc(ce);
+ wq_desc = __get_wq_desc_v70(ce);
memset(wq_desc, 0, sizeof(*wq_desc));
wq_desc->wq_status = WQ_STATUS_ACTIVE;
+ ce->parallel.guc.wq_head = &wq_desc->head;
+ ce->parallel.guc.wq_tail = &wq_desc->tail;
+ ce->parallel.guc.wq_status = &wq_desc->wq_status;
+
clear_children_join_go_memory(ce);
}
}
@@ -2761,11 +2932,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
u16 guc_id,
u32 preemption_timeout)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, guc_id);
- __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, guc_id);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
+ guc_id,
+ preemption_timeout
+ };
+
+ intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
@@ -3013,11 +3194,21 @@ static int guc_context_alloc(struct intel_context *ce)
static void __guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, ce->guc_id.id);
- __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
+ ce->guc_id.id,
+ ce->guc_state.prio,
+ };
+
+ guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_set_prio(struct intel_guc *guc,
@@ -4527,17 +4718,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
guc_log_context_priority(p, ce);
if (intel_context_is_parent(ce)) {
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
struct intel_context *child;
drm_printf(p, "\t\tNumber children: %u\n",
ce->parallel.number_children);
- drm_printf(p, "\t\tWQI Head: %u\n",
- READ_ONCE(wq_desc->head));
- drm_printf(p, "\t\tWQI Tail: %u\n",
- READ_ONCE(wq_desc->tail));
- drm_printf(p, "\t\tWQI Status: %u\n\n",
- READ_ONCE(wq_desc->wq_status));
+
+ if (ce->parallel.guc.wq_status) {
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(*ce->parallel.guc.wq_status));
+ }
if (ce->engine->emit_bb_start ==
emit_bb_start_parent_no_preempt_mid_batch) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 2ff55b9994bc..703f42ba5ddd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1))
+#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3))
+
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
@@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
@@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
static const struct uc_fw_platform_requirement blobs_guc[] = {
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
};
+ static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
+ INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
@@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
+ uc_fw->wanted_path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
break;
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+ const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
+ u32 count = ARRAY_SIZE(blobs_guc_fallback);
+
+ for (i = 0; i < count && p <= blobs[i].p; i++) {
+ if (p == blobs[i].p && rev >= blobs[i].rev) {
+ const struct uc_fw_blob *blob = &blobs[i].blob;
+
+ uc_fw->fallback.path = blob->path;
+ uc_fw->fallback.major_ver = blob->major;
+ uc_fw->fallback.minor_ver = blob->minor;
+ break;
+ }
+ }
+ }
+
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
for (i = 1; i < fw_count; i++) {
@@ -338,7 +363,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
- err = request_firmware(&fw, uc_fw->path, dev);
+ err = firmware_request_nowarn(&fw, uc_fw->path, dev);
+ if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
+ err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
+ if (!err) {
+ drm_notice(&i915->drm,
+ "%s firmware %s is recommended, but only %s was found\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->wanted_path,
+ uc_fw->fallback.path);
+ drm_info(&i915->drm,
+ "Consider updating your linux-firmware pkg or downloading from %s\n",
+ INTEL_UC_FIRMWARE_URL);
+
+ uc_fw->path = uc_fw->fallback.path;
+ uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
+ uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
+ }
+ }
if (err)
goto fail;
@@ -437,8 +479,8 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
@@ -796,7 +838,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
{
drm_printf(p, "%s firmware: %s\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
+ if (uc_fw->fallback.path) {
+ drm_printf(p, "%s firmware fallback: %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
+ drm_printf(p, "fallback selected: %s\n",
+ str_yes_no(uc_fw->path == uc_fw->fallback.path));
+ }
drm_printf(p, "\tstatus: %s\n",
intel_uc_fw_status_repr(uc_fw->status));
drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 3229018877d3..562acdf88adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -74,6 +74,7 @@ struct intel_uc_fw {
const enum intel_uc_fw_status status;
enum intel_uc_fw_status __status; /* no accidental overwrites */
};
+ const char *wanted_path;
const char *path;
bool user_overridden;
size_t size;
@@ -98,6 +99,12 @@ struct intel_uc_fw {
u16 major_ver_found;
u16 minor_ver_found;
+ struct {
+ const char *path;
+ u16 major_ver;
+ u16 minor_ver;
+ } fallback;
+
u32 rsa_size;
u32 ucode_size;
diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c
index c849533ca83e..3f5750cc2673 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-dev.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c
@@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output)
ret = dcss_submodules_init(dcss);
if (ret) {
+ of_node_put(dcss->of_port);
dev_err(dev, "submodules initialization failed\n");
goto clks_err;
}
@@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss)
dcss_clocks_disable(dcss);
}
+ of_node_put(dcss->of_port);
+
pm_runtime_disable(dcss->dev);
dcss_submodules_stop(dcss);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 7ba66ad68a8a..16356611b5b9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -680,7 +680,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
goto out_free_dma;
for (i = 0; i < npages; i += max) {
- args.end = start + (max << PAGE_SHIFT);
+ if (args.start + (max << PAGE_SHIFT) > end)
+ args.end = end;
+ else
+ args.end = args.start + (max << PAGE_SHIFT);
+
ret = migrate_vma_setup(&args);
if (ret)
goto out_free_pfns;
diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index c96014464355..a189982601a4 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -713,7 +713,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel)
of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms);
desc->delay.hpd_reliable = reliable_ms;
of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms);
- desc->delay.hpd_reliable = absent_ms;
+ desc->delay.hpd_absent = absent_ms;
/* Power the panel on so we can read the EDID */
ret = pm_runtime_get_sync(dev);
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 191c56064f19..6b25b2f4f5a3 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
}
EXPORT_SYMBOL(drm_sched_entity_flush);
-static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
+static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
@@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
- init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
- irq_work_queue(&job->work);
+ INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
+ schedule_work(&job->work);
}
static struct dma_fence *
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 768242a78e2b..5422363690e7 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -627,7 +627,7 @@ static const struct drm_connector_funcs simpledrm_connector_funcs = {
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
};
-static int
+static enum drm_mode_status
simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
const struct drm_display_mode *mode)
{
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 4e239bd75b1d..5a9d47a229e4 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -428,6 +428,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->ccd_offset = 0x154;
k10temp_get_ccd_support(pdev, data, 8);
break;
+ case 0xa0 ... 0xaf:
+ data->ccd_offset = 0x300;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
}
} else if (boot_cpu_data.x86 == 0x19) {
data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
@@ -445,6 +449,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->ccd_offset = 0x300;
k10temp_get_ccd_support(pdev, data, 8);
break;
+ case 0x60 ... 0x6f:
+ case 0x70 ... 0x7f:
+ data->ccd_offset = 0x308;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
case 0x10 ... 0x1f:
case 0xa0 ... 0xaf:
data->ccd_offset = 0x300;
@@ -489,10 +498,13 @@ static const struct pci_device_id k10temp_id_table[] = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{ PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 3d6f8ee355bf..630cfa4ddd46 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr)
*/
static irqreturn_t cdns_i2c_master_isr(void *ptr)
{
- unsigned int isr_status, avail_bytes, updatetx;
+ unsigned int isr_status, avail_bytes;
unsigned int bytes_to_send;
- bool hold_quirk;
+ bool updatetx;
struct cdns_i2c *id = ptr;
/* Signal completion only after everything is updated */
int done_flag = 0;
@@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
* Check if transfer size register needs to be updated again for a
* large data receive operation.
*/
- updatetx = 0;
- if (id->recv_count > id->curr_recv_count)
- updatetx = 1;
-
- hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
+ updatetx = id->recv_count > id->curr_recv_count;
/* When receiving, handle data interrupt and completion interrupt */
if (id->p_recv_buf &&
@@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
break;
}
- if (cdns_is_holdquirk(id, hold_quirk))
+ if (cdns_is_holdquirk(id, updatetx))
break;
}
@@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
* maintain transfer size non-zero while performing a large
* receive operation.
*/
- if (cdns_is_holdquirk(id, hold_quirk)) {
+ if (cdns_is_holdquirk(id, updatetx)) {
/* wait while fifo is full */
while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
(id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
@@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
CDNS_I2C_XFER_SIZE_OFFSET);
id->curr_recv_count = id->recv_count;
}
- } else if (id->recv_count && !hold_quirk &&
- !id->curr_recv_count) {
-
- /* Set the slave address in address register*/
- cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
- CDNS_I2C_ADDR_OFFSET);
-
- if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
- cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
- } else {
- cdns_i2c_writereg(id->recv_count,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = id->recv_count;
- }
}
/* Clear hold (if not repeated start) and signal completion */
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index e9e2db68b9fb..78fb1a4274a6 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -66,7 +66,7 @@
/* IMX I2C registers:
* the I2C register offset is different between SoCs,
- * to provid support for all these chips, split the
+ * to provide support for all these chips, split the
* register offset into a fixed base address and a
* variable shift value, then the full register offset
* will be calculated by
diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c
index 56aa424fd71d..815cc561386b 100644
--- a/drivers/i2c/busses/i2c-mlxcpld.c
+++ b/drivers/i2c/busses/i2c-mlxcpld.c
@@ -49,7 +49,7 @@
#define MLXCPLD_LPCI2C_NACK_IND 2
#define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04
-#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c
+#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e
#define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42
enum mlxcpld_i2c_frequency {
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f5c6802aa6c3..445b19d20b9a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -56,6 +56,7 @@
#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
+#include <asm/fpu/api.h>
#define INTEL_IDLE_VERSION "0.5.1"
@@ -114,6 +115,11 @@ static unsigned int mwait_substates __initdata;
#define CPUIDLE_FLAG_IBRS BIT(16)
/*
+ * Initialize large xstate for the C6-state entrance.
+ */
+#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
+
+/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -162,7 +168,13 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
raw_local_irq_enable();
ret = __intel_idle(dev, drv, index);
- raw_local_irq_disable();
+
+ /*
+ * The lockdep hardirqs state may be changed to 'on' with timer
+ * tick interrupt followed by __do_softirq(). Use local_irq_disable()
+ * to keep the hardirqs state correct.
+ */
+ local_irq_disable();
return ret;
}
@@ -185,6 +197,13 @@ static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
return ret;
}
+static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ fpu_idle_fpregs();
+ return __intel_idle(dev, drv, index);
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -200,8 +219,12 @@ static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- unsigned long eax = flg2MWAIT(drv->states[index].flags);
unsigned long ecx = 1; /* break on interrupt flag */
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
+ fpu_idle_fpregs();
mwait_idle_with_hints(eax, ecx);
@@ -936,7 +959,8 @@ static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_INIT_XSTATE,
.exit_latency = 290,
.target_residency = 800,
.enter = &intel_idle,
@@ -1851,6 +1875,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
drv->states[drv->state_count].enter = intel_idle_ibrs;
}
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
+ drv->states[drv->state_count].enter = intel_idle_xstate;
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 638bf4a1ed94..646fa8677490 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -4231,10 +4231,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
struct irdma_cm_node *cm_node;
struct list_head teardown_list;
struct ib_qp_attr attr;
- struct irdma_sc_vsi *vsi = &iwdev->vsi;
- struct irdma_sc_qp *sc_qp;
- struct irdma_qp *qp;
- int i;
INIT_LIST_HEAD(&teardown_list);
@@ -4251,52 +4247,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
irdma_cm_disconn(cm_node->iwqp);
irdma_rem_ref_cm_node(cm_node);
}
- if (!iwdev->roce_mode)
- return;
-
- INIT_LIST_HEAD(&teardown_list);
- for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
- mutex_lock(&vsi->qos[i].qos_mutex);
- list_for_each_safe (list_node, list_core_temp,
- &vsi->qos[i].qplist) {
- u32 qp_ip[4];
-
- sc_qp = container_of(list_node, struct irdma_sc_qp,
- list);
- if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
- continue;
-
- qp = sc_qp->qp_uk.back_qp;
- if (!disconnect_all) {
- if (nfo->ipv4)
- qp_ip[0] = qp->udp_info.local_ipaddr[3];
- else
- memcpy(qp_ip,
- &qp->udp_info.local_ipaddr[0],
- sizeof(qp_ip));
- }
-
- if (disconnect_all ||
- (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
- !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
- spin_lock(&iwdev->rf->qptable_lock);
- if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
- irdma_qp_add_ref(&qp->ibqp);
- list_add(&qp->teardown_entry,
- &teardown_list);
- }
- spin_unlock(&iwdev->rf->qptable_lock);
- }
- }
- mutex_unlock(&vsi->qos[i].qos_mutex);
- }
-
- list_for_each_safe (list_node, list_core_temp, &teardown_list) {
- qp = container_of(list_node, struct irdma_qp, teardown_entry);
- attr.qp_state = IB_QPS_ERR;
- irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
- irdma_qp_rem_ref(&qp->ibqp);
- }
}
/**
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
index e46fc110004d..50299f58b6b3 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -201,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
index cf53b17510cd..5986fd906308 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
index 46c12334c735..4789e85d717b 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -127,6 +127,7 @@ struct irdma_hw_attrs {
u64 max_hw_outbound_msg_size;
u64 max_hw_inbound_msg_size;
u64 max_mr_size;
+ u64 page_size_cap;
u32 min_hw_qp_id;
u32 min_hw_aeq_size;
u32 max_hw_aeq_size;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index c4412ece5a6d..96135a228f26 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -32,7 +32,7 @@ static int irdma_query_device(struct ib_device *ibdev,
props->vendor_part_id = pcidev->device;
props->hw_ver = rf->pcidev->revision;
- props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ props->page_size_cap = hw_attrs->page_size_cap;
props->max_mr_size = hw_attrs->max_mr_size;
props->max_qp = rf->max_qp - rf->used_qps;
props->max_qp_wr = hw_attrs->max_qp_wr;
@@ -2781,7 +2781,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
iwmr->page_size = ib_umem_find_best_pgsz(region,
- SZ_4K | SZ_2M | SZ_1G,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
virt);
if (unlikely(!iwmr->page_size)) {
kfree(iwmr);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20e53b167f81..c8539d0e12dd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7304,7 +7304,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
conf->mddev = mddev;
- if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+ ret = -ENOMEM;
+ conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!conf->stripe_hashtbl)
goto abort;
/* We init hash_locks[0] separately to that it can be used
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 2e0aa74ac185..95ef971b5e1c 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -13,10 +13,13 @@ lkdtm-$(CONFIG_LKDTM) += cfi.o
lkdtm-$(CONFIG_LKDTM) += fortify.o
lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o
-KASAN_SANITIZE_rodata.o := n
KASAN_SANITIZE_stackleak.o := n
-KCOV_INSTRUMENT_rodata.o := n
-CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
+
+KASAN_SANITIZE_rodata.o := n
+KCSAN_SANITIZE_rodata.o := n
+KCOV_INSTRUMENT_rodata.o := n
+OBJECT_FILES_NON_STANDARD_rodata.o := y
+CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS)
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 86e867ffbb10..033be559a730 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1298,8 +1298,9 @@ static int sdhci_omap_probe(struct platform_device *pdev)
/*
* omap_device_pm_domain has callbacks to enable the main
* functional clock, interface clock and also configure the
- * SYSCONFIG register of omap devices. The callback will be invoked
- * as part of pm_runtime_get_sync.
+ * SYSCONFIG register to clear any boot loader set voltage
+ * capabilities before calling sdhci_setup_host(). The
+ * callback will be invoked as part of pm_runtime_get_sync.
*/
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, 50);
@@ -1441,7 +1442,8 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
- sdhci_runtime_suspend_host(host);
+ if (omap_host->con != -EINVAL)
+ sdhci_runtime_suspend_host(host);
sdhci_omap_context_save(omap_host);
@@ -1458,10 +1460,10 @@ static int __maybe_unused sdhci_omap_runtime_resume(struct device *dev)
pinctrl_pm_select_default_state(dev);
- if (omap_host->con != -EINVAL)
+ if (omap_host->con != -EINVAL) {
sdhci_omap_context_restore(omap_host);
-
- sdhci_runtime_resume_host(host, 0);
+ sdhci_runtime_resume_host(host, 0);
+ }
return 0;
}
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 889e40329956..93da23682d86 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -850,9 +850,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
unsigned int tRP_ps;
bool use_half_period;
int sample_delay_ps, sample_delay_factor;
- u16 busy_timeout_cycles;
+ unsigned int busy_timeout_cycles;
u8 wrn_dly_sel;
unsigned long clk_rate, min_rate;
+ u64 busy_timeout_ps;
if (sdr->tRC_min >= 30000) {
/* ONFI non-EDO modes [0-3] */
@@ -885,7 +886,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps);
data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps);
data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps);
- busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps);
+ busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max);
+ busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps);
hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index be2719a3ba70..e019526e1df6 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
ihv3->nsrcs = 0;
ihv3->resv = 0;
ihv3->suppress = false;
- ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
ihv3->csum = 0;
csum = &ihv3->csum;
csum_start = (void *)ihv3;
@@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
return skb;
}
-static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
+static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
+ bool validate)
{
if (validate && amt->status >= status)
return;
netdev_dbg(amt->dev, "Update GW status %s -> %s",
status_str[amt->status], status_str[status]);
- amt->status = status;
+ WRITE_ONCE(amt->status, status);
}
static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
@@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
tunnel->status = status;
}
-static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
-{
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, status, validate);
- spin_unlock_bh(&amt->lock);
-}
-
static void amt_update_relay_status(struct amt_tunnel_list *tunnel,
enum amt_status status, bool validate)
{
@@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt)
if (unlikely(net_xmit_eval(err)))
amt->dev->stats.tx_errors++;
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
- spin_unlock_bh(&amt->lock);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
out:
rcu_read_unlock();
}
@@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel)
}
#endif
+static bool amt_queue_event(struct amt_dev *amt, enum amt_event event,
+ struct sk_buff *skb)
+{
+ int index;
+
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events >= AMT_MAX_EVENTS) {
+ spin_unlock_bh(&amt->lock);
+ return 1;
+ }
+
+ index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS;
+ amt->events[index].event = event;
+ amt->events[index].skb = skb;
+ amt->nr_events++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ queue_work(amt_wq, &amt->event_wq);
+ spin_unlock_bh(&amt->lock);
+
+ return 0;
+}
+
static void amt_secret_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
@@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work)
msecs_to_jiffies(AMT_SECRET_TIMEOUT));
}
-static void amt_discovery_work(struct work_struct *work)
+static void amt_event_send_discovery(struct amt_dev *amt)
{
- struct amt_dev *amt = container_of(to_delayed_work(work),
- struct amt_dev,
- discovery_wq);
-
- spin_lock_bh(&amt->lock);
if (amt->status > AMT_STATUS_SENT_DISCOVERY)
goto out;
get_random_bytes(&amt->nonce, sizeof(__be32));
- spin_unlock_bh(&amt->lock);
amt_send_discovery(amt);
- spin_lock_bh(&amt->lock);
out:
mod_delayed_work(amt_wq, &amt->discovery_wq,
msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
- spin_unlock_bh(&amt->lock);
}
-static void amt_req_work(struct work_struct *work)
+static void amt_discovery_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
struct amt_dev,
- req_wq);
+ discovery_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL))
+ mod_delayed_work(amt_wq, &amt->discovery_wq,
+ msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
+}
+
+static void amt_event_send_request(struct amt_dev *amt)
+{
u32 exp;
- spin_lock_bh(&amt->lock);
if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT)
goto out;
if (amt->req_cnt > AMT_MAX_REQ_COUNT) {
netdev_dbg(amt->dev, "Gateway is not ready");
amt->qi = AMT_INIT_REQ_TIMEOUT;
- amt->ready4 = false;
- amt->ready6 = false;
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
amt->remote_ip = 0;
- __amt_update_gw_status(amt, AMT_STATUS_INIT, false);
+ amt_update_gw_status(amt, AMT_STATUS_INIT, false);
amt->req_cnt = 0;
+ amt->nonce = 0;
goto out;
}
- spin_unlock_bh(&amt->lock);
+
+ if (!amt->req_cnt) {
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
+ get_random_bytes(&amt->nonce, sizeof(__be32));
+ }
amt_send_request(amt, false);
amt_send_request(amt, true);
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
amt->req_cnt++;
out:
exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT);
mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000));
- spin_unlock_bh(&amt->lock);
+}
+
+static void amt_req_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(to_delayed_work(work),
+ struct amt_dev,
+ req_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL))
+ mod_delayed_work(amt_wq, &amt->req_wq,
+ msecs_to_jiffies(100));
}
static bool amt_send_membership_update(struct amt_dev *amt,
@@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
/* Gateway only passes IGMP/MLD packets */
if (!report)
goto free;
- if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
+ if ((!v6 && !READ_ONCE(amt->ready4)) ||
+ (v6 && !READ_ONCE(amt->ready6)))
goto free;
if (amt_send_membership_update(amt, skb, v6))
goto free;
@@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb)
ipv4_is_zeronet(amta->ip4))
return true;
+ if (amt->status != AMT_STATUS_SENT_DISCOVERY ||
+ amt->nonce != amta->nonce)
+ return true;
+
amt->remote_ip = amta->ip4;
netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip);
mod_delayed_work(amt_wq, &amt->req_wq, 0);
@@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb)
struct ethhdr *eth;
struct iphdr *iph;
+ if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE)
+ return true;
+
hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_size))
return true;
@@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
if (amtmq->reserved || amtmq->version)
return true;
+ if (amtmq->nonce != amt->nonce)
+ return true;
+
hdr_size -= sizeof(*eth);
if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false))
return true;
@@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
iph = ip_hdr(skb);
if (iph->version == 4) {
+ if (READ_ONCE(amt->ready4))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS +
sizeof(*ihv3)))
return true;
@@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready4 = true;
+ WRITE_ONCE(amt->ready4, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = ihv3->qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IP);
eth->h_proto = htons(ETH_P_IP);
ip_eth_mc_map(iph->daddr, eth->h_dest);
@@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
struct mld2_query *mld2q;
struct ipv6hdr *ip6h;
+ if (READ_ONCE(amt->ready6))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS +
sizeof(*mld2q)))
return true;
@@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready6 = true;
+ WRITE_ONCE(amt->ready6, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = mld2q->mld2q_qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IPV6);
eth->h_proto = htons(ETH_P_IPV6);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
@@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
skb->pkt_type = PACKET_MULTICAST;
skb->ip_summed = CHECKSUM_NONE;
len = skb->len;
+ local_bh_disable();
if (__netif_rx(skb) == NET_RX_SUCCESS) {
amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true);
dev_sw_netstats_rx_add(amt->dev, len);
} else {
amt->dev->stats.rx_dropped++;
}
+ local_bh_enable();
return false;
}
@@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
if (tunnel->ip4 == iph->saddr)
goto send;
+ spin_lock_bh(&amt->lock);
if (amt->nr_tunnels >= amt->max_tunnels) {
+ spin_unlock_bh(&amt->lock);
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
return true;
}
@@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
tunnel = kzalloc(sizeof(*tunnel) +
(sizeof(struct hlist_head) * amt->hash_buckets),
GFP_ATOMIC);
- if (!tunnel)
+ if (!tunnel) {
+ spin_unlock_bh(&amt->lock);
return true;
+ }
tunnel->source_port = udph->source;
tunnel->ip4 = iph->saddr;
@@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
- spin_lock_bh(&amt->lock);
list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
tunnel->key = amt->key;
- amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
+ __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
amt->nr_tunnels++;
mod_delayed_work(amt_wq, &tunnel->gc_wq,
msecs_to_jiffies(amt_gmi(amt)));
@@ -2688,6 +2732,38 @@ send:
return false;
}
+static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb)
+{
+ int type = amt_parse_type(skb);
+ int err = 1;
+
+ if (type == -1)
+ goto drop;
+
+ if (amt->mode == AMT_MODE_GATEWAY) {
+ switch (type) {
+ case AMT_MSG_ADVERTISEMENT:
+ err = amt_advertisement_handler(amt, skb);
+ break;
+ case AMT_MSG_MEMBERSHIP_QUERY:
+ err = amt_membership_query_handler(amt, skb);
+ if (!err)
+ return;
+ break;
+ default:
+ netdev_dbg(amt->dev, "Invalid type of Gateway\n");
+ break;
+ }
+ }
+drop:
+ if (err) {
+ amt->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ } else {
+ consume_skb(skb);
+ }
+}
+
static int amt_rcv(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_advertisement_handler(amt, skb);
- break;
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
+ goto drop;
+ }
+ goto out;
case AMT_MSG_MULTICAST_DATA:
if (iph->saddr != amt->remote_ip) {
netdev_dbg(amt->dev, "Invalid Relay IP\n");
@@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_membership_query_handler(amt, skb);
- if (err)
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
goto drop;
- else
- goto out;
+ }
+ goto out;
default:
err = true;
netdev_dbg(amt->dev, "Invalid type of Gateway\n");
@@ -2780,6 +2861,46 @@ out:
return 0;
}
+static void amt_event_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(work, struct amt_dev, event_wq);
+ struct sk_buff *skb;
+ u8 event;
+ int i;
+
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events == 0) {
+ spin_unlock_bh(&amt->lock);
+ return;
+ }
+ event = amt->events[amt->event_idx].event;
+ skb = amt->events[amt->event_idx].skb;
+ amt->events[amt->event_idx].event = AMT_EVENT_NONE;
+ amt->events[amt->event_idx].skb = NULL;
+ amt->nr_events--;
+ amt->event_idx++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ spin_unlock_bh(&amt->lock);
+
+ switch (event) {
+ case AMT_EVENT_RECEIVE:
+ amt_gw_rcv(amt, skb);
+ break;
+ case AMT_EVENT_SEND_DISCOVERY:
+ amt_event_send_discovery(amt);
+ break;
+ case AMT_EVENT_SEND_REQUEST:
+ amt_event_send_request(amt);
+ break;
+ default:
+ if (skb)
+ kfree_skb(skb);
+ break;
+ }
+ }
+}
+
static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
break;
case AMT_MSG_REQUEST:
case AMT_MSG_MEMBERSHIP_UPDATE:
- if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
+ if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
mod_delayed_work(amt_wq, &amt->req_wq, 0);
break;
default:
@@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev)
amt->ready4 = false;
amt->ready6 = false;
+ amt->event_idx = 0;
+ amt->nr_events = 0;
err = amt_socket_create(amt);
if (err)
@@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev)
amt->req_cnt = 0;
amt->remote_ip = 0;
+ amt->nonce = 0;
get_random_bytes(&amt->key, sizeof(siphash_key_t));
amt->status = AMT_STATUS_INIT;
@@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev)
struct amt_dev *amt = netdev_priv(dev);
struct amt_tunnel_list *tunnel, *tmp;
struct socket *sock;
+ struct sk_buff *skb;
+ int i;
cancel_delayed_work_sync(&amt->req_wq);
cancel_delayed_work_sync(&amt->discovery_wq);
@@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev)
if (sock)
udp_tunnel_sock_release(sock);
+ cancel_work_sync(&amt->event_wq);
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ skb = amt->events[i].skb;
+ if (skb)
+ kfree_skb(skb);
+ amt->events[i].event = AMT_EVENT_NONE;
+ amt->events[i].skb = NULL;
+ }
+
amt->ready4 = false;
amt->ready6 = false;
amt->req_cnt = 0;
@@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev,
goto err;
}
if (amt->mode == AMT_MODE_RELAY) {
- amt->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
amt->qri = 10;
dev->needed_headroom = amt->stream_dev->needed_headroom +
AMT_RELAY_HLEN;
@@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev,
INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work);
INIT_DELAYED_WORK(&amt->req_wq, amt_req_work);
INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work);
+ INIT_WORK(&amt->event_wq, amt_event_work);
INIT_LIST_HEAD(&amt->tunnel_list);
-
return 0;
err:
dev_put(amt->stream_dev);
@@ -3280,7 +3415,7 @@ static int __init amt_init(void)
if (err < 0)
goto unregister_notifier;
- amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1);
+ amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0);
if (!amt_wq) {
err = -ENOMEM;
goto rtnl_unregister;
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index ba42cef10a53..cb0321ea853c 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
of_child = of_get_child_by_name(pdev->dev.of_node, name);
if (of_child && of_device_is_available(of_child))
channels_mask |= BIT(i);
+ of_node_put(of_child);
}
if (chip_id != RENESAS_RZG2L) {
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 9b47b07162fe..bc6518504fd4 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1690,8 +1690,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
u32 osc;
int err;
- /* The OSC_LPMEN is only supported on MCP2518FD, so use it to
- * autodetect the model.
+ /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863,
+ * so use it to autodetect the model.
*/
err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC,
MCP251XFD_REG_OSC_LPMEN,
@@ -1703,10 +1703,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
if (err)
return err;
- if (osc & MCP251XFD_REG_OSC_LPMEN)
- devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
- else
+ if (osc & MCP251XFD_REG_OSC_LPMEN) {
+ /* We cannot distinguish between MCP2518FD and
+ * MCP251863. If firmware specifies MCP251863, keep
+ * it, otherwise set to MCP2518FD.
+ */
+ if (mcp251xfd_is_251863(priv))
+ devtype_data = &mcp251xfd_devtype_data_mcp251863;
+ else
+ devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
+ } else {
devtype_data = &mcp251xfd_devtype_data_mcp2517fd;
+ }
if (!mcp251xfd_is_251XFD(priv) &&
priv->devtype_data.model != devtype_data->model) {
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 9ca8c8d7740f..92a500e1ccd2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1038,18 +1038,21 @@ int ksz_switch_register(struct ksz_device *dev,
ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
if (!ports)
ports = of_get_child_by_name(dev->dev->of_node, "ports");
- if (ports)
+ if (ports) {
for_each_available_child_of_node(ports, port) {
if (of_property_read_u32(port, "reg",
&port_num))
continue;
if (!(dev->port_mask & BIT(port_num))) {
of_node_put(port);
+ of_node_put(ports);
return -EINVAL;
}
of_get_phy_mode(port,
&dev->ports[port_num].interface);
}
+ of_node_put(ports);
+ }
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 72b6fc1932b5..698c7d1fb45c 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
+static const struct spi_device_id sja1105_spi_ids[] = {
+ { "sja1105e" },
+ { "sja1105t" },
+ { "sja1105p" },
+ { "sja1105q" },
+ { "sja1105r" },
+ { "sja1105s" },
+ { "sja1110a" },
+ { "sja1110b" },
+ { "sja1110c" },
+ { "sja1110d" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
+
static struct spi_driver sja1105_driver = {
.driver = {
.name = "sja1105",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(sja1105_dt_ids),
},
+ .id_table = sja1105_spi_ids,
.probe = sja1105_probe,
.remove = sja1105_remove,
.shutdown = sja1105_shutdown,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 3110895358d8..97a92e6da60d 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = {
};
MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+static const struct spi_device_id vsc73xx_spi_ids[] = {
+ { "vsc7385" },
+ { "vsc7388" },
+ { "vsc7395" },
+ { "vsc7398" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
+
static struct spi_driver vsc73xx_spi_driver = {
.probe = vsc73xx_spi_probe,
.remove = vsc73xx_spi_remove,
.shutdown = vsc73xx_spi_shutdown,
+ .id_table = vsc73xx_spi_ids,
.driver = {
.name = "vsc73xx-spi",
.of_match_table = vsc73xx_of_match,
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 7c760aa65540..ddfe9208529a 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
csk->sndbuf = newsk->sk_sndbuf;
csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
- sock_net(newsk)->
- ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(newsk)->
+ ipv4.sysctl_tcp_window_scaling),
tp->window_clamp);
neigh_release(n);
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
@@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
#endif
}
if (req->tcpopt.wsf <= 14 &&
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
inet_rsk(oreq)->wscale_ok = 1;
inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 528eb0f223b1..b4f5e57d0285 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2287,7 +2287,7 @@ err:
/* Uses sync mcc */
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data)
+ u8 page_num, u32 off, u32 len, u8 *data)
{
struct be_dma_mem cmd;
struct be_mcc_wrb *wrb;
@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
req->port = cpu_to_le32(adapter->hba_port_num);
req->page_num = cpu_to_le32(page_num);
status = be_mcc_notify_wait(adapter);
- if (!status) {
+ if (!status && len > 0) {
struct be_cmd_resp_port_type *resp = cmd.va;
- memcpy(data, resp->page_data, PAGE_DATA_LEN);
+ memcpy(data, resp->page_data + off, len);
}
err:
mutex_unlock(&adapter->mcc_lock);
@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
switch (adapter->phy.interface_type) {
case PHY_TYPE_QSFP:
@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
strlcpy(adapter->phy.vendor_name, page_data +
SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index db1f3b908582..e2085c68c0ee 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
u32 *state);
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data);
+ u8 page_num, u32 off, u32 len, u8 *data);
int be_cmd_query_cable_type(struct be_adapter *adapter);
int be_cmd_query_sfp_info(struct be_adapter *adapter);
int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index dfa784339781..bd0df189d871 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev,
return -EOPNOTSUPP;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
modinfo->type = ETH_MODULE_SFF_8079;
@@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
{
struct be_adapter *adapter = netdev_priv(netdev);
int status;
+ u32 begin, end;
if (!check_privilege(adapter, MAX_PRIVILEGES))
return -EOPNOTSUPP;
- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- data);
- if (status)
- goto err;
+ begin = eeprom->offset;
+ end = eeprom->offset + eeprom->len;
+
+ if (begin < PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
+ min_t(u32, end, PAGE_DATA_LEN) - begin,
+ data);
+ if (status)
+ goto err;
+
+ data += PAGE_DATA_LEN - begin;
+ begin = PAGE_DATA_LEN;
+ }
- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
- status = be_cmd_read_port_transceiver_data(adapter,
- TR_PAGE_A2,
- data +
- PAGE_DATA_LEN);
+ if (end > PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
+ begin - PAGE_DATA_LEN,
+ end - begin, data);
if (status)
goto err;
}
- if (eeprom->offset)
- memcpy(data, data + eeprom->offset, eeprom->len);
err:
return be_cmd_status(status);
}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
index 0f6a549b9f67..29a6c2ede43a 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
@@ -142,6 +142,7 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
int ref_ok, struct funeth_txq *xdp_q)
{
struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
struct xdp_buff xdp;
u32 act;
@@ -163,7 +164,9 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
case XDP_TX:
if (unlikely(!ref_ok))
goto pass;
- if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data))
+
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
goto xdp_error;
FUN_QSTAT_INC(q, xdp_tx);
q->xdp_flush |= FUN_XDP_FLUSH_TX;
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
index ff6e29237253..2f6698b98b03 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
@@ -466,7 +466,7 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
do {
fun_xdp_unmap(q, reclaim_idx);
- page_frag_free(q->info[reclaim_idx].vaddr);
+ xdp_return_frame(q->info[reclaim_idx].xdpf);
trace_funeth_tx_free(q, reclaim_idx, 1, head);
@@ -479,11 +479,11 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
return npkts;
}
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
{
struct fun_eth_tx_req *req;
struct fun_dataop_gl *gle;
- unsigned int idx;
+ unsigned int idx, len;
dma_addr_t dma;
if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
@@ -494,7 +494,8 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
return false;
}
- dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
+ len = xdpf->len;
+ dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
FUN_QSTAT_INC(q, tx_map_err);
return false;
@@ -514,7 +515,7 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
gle = (struct fun_dataop_gl *)req->dataop.imm;
fun_dataop_gl_init(gle, 0, 0, len, dma);
- q->info[idx].vaddr = data;
+ q->info[idx].xdpf = xdpf;
u64_stats_update_begin(&q->syncp);
q->stats.tx_bytes += len;
@@ -545,12 +546,9 @@ int fun_xdp_xmit_frames(struct net_device *dev, int n,
if (unlikely(q_idx >= fp->num_xdpqs))
return -ENXIO;
- for (q = xdpqs[q_idx], i = 0; i < n; i++) {
- const struct xdp_frame *xdpf = frames[i];
-
- if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
+ for (q = xdpqs[q_idx], i = 0; i < n; i++)
+ if (!fun_xdp_tx(q, frames[i]))
break;
- }
if (unlikely(flags & XDP_XMIT_FLUSH))
fun_txq_wr_db(q);
@@ -577,7 +575,7 @@ static void fun_xdpq_purge(struct funeth_txq *q)
unsigned int idx = q->cons_cnt & q->mask;
fun_xdp_unmap(q, idx);
- page_frag_free(q->info[idx].vaddr);
+ xdp_return_frame(q->info[idx].xdpf);
q->cons_cnt++;
}
}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
index 04c9f91b7489..8708e2895946 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
+++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
@@ -95,8 +95,8 @@ struct funeth_txq_stats { /* per Tx queue SW counters */
struct funeth_tx_info { /* per Tx descriptor state */
union {
- struct sk_buff *skb; /* associated packet */
- void *vaddr; /* start address for XDP */
+ struct sk_buff *skb; /* associated packet (sk_buff path) */
+ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */
};
};
@@ -245,7 +245,7 @@ static inline int fun_irq_node(const struct fun_irq *p)
int fun_rxq_napi_poll(struct napi_struct *napi, int budget);
int fun_txq_napi_poll(struct napi_struct *napi, int budget);
netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev);
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len);
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf);
int fun_xdp_xmit_frames(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 13382df2f2ef..bcf680e83811 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -630,7 +630,6 @@ struct e1000_phy_info {
bool disable_polarity_correction;
bool is_mdix;
bool polarity_correction;
- bool reset_disable;
bool speed_downgraded;
bool autoneg_wait_to_complete;
};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index e6c8e6d5234f..9466f65a6da7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
bool blocked = false;
int i = 0;
- /* Check the PHY (LCD) reset flag */
- if (hw->phy.reset_disable)
- return true;
-
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
usleep_range(10000, 11000);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 638a3ddd7ada..2504b11c3169 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -271,7 +271,6 @@
#define I217_CGFREG_ENABLE_MTA_RESET 0x0002
#define I217_MEMPWR PHY_REG(772, 26)
#define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010
-#define I217_MEMPWR_MOEM 0x1000
/* Receive Address Initial CRC Calculation */
#define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fa06f68c8c80..f1729940e46c 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
hw->mac.type >= e1000_pch_adp) {
+ /* Keep the GPT clock enabled for CSME */
+ mac_data = er32(FEXTNVM);
+ mac_data |= BIT(3);
+ ew32(FEXTNVM, mac_data);
/* Request ME unconfigure the device from S0ix */
mac_data = er32(H2ME);
mac_data &= ~E1000_H2ME_START_DPG;
@@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data |= I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Disable LCD reset */
- hw->phy.reset_disable = true;
- }
-
e1000e_flush_lpic(pdev);
e1000e_pm_freeze(dev);
@@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
/* Introduce S0ix implementation */
@@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
if (rc)
return rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data &= ~I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Enable LCD reset */
- hw->phy.reset_disable = false;
- }
-
return e1000e_pm_thaw(dev);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index aa786fd55951..685556e968f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1925,11 +1925,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
* non-zero req_queue_pairs says that user requested a new
* queue count via ethtool's set_channels, so use this
* value for queues distribution across traffic classes
+ * We need at least one queue pair for the interface
+ * to be usable as we see in else statement.
*/
if (vsi->req_queue_pairs > 0)
vsi->num_queue_pairs = vsi->req_queue_pairs;
else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
vsi->num_queue_pairs = pf->num_lan_msix;
+ else
+ vsi->num_queue_pairs = 1;
}
/* Number of queues per enabled TC */
@@ -10650,7 +10654,7 @@ static int i40e_reset(struct i40e_pf *pf)
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
+ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
i40e_status ret;
@@ -10658,13 +10662,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
int v;
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- i40e_check_recovery_mode(pf)) {
+ is_recovery_mode_reported)
i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
- }
if (test_bit(__I40E_DOWN, pf->state) &&
- !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !old_recovery_mode_bit)
+ !test_bit(__I40E_RECOVERY_MODE, pf->state))
goto clear_recovery;
dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
@@ -10691,13 +10693,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
* accordingly with regard to resources initialization
* and deinitialization
*/
- if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
- old_recovery_mode_bit) {
+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
if (i40e_get_capabilities(pf,
i40e_aqc_opc_list_func_capabilities))
goto end_unlock;
- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+ if (is_recovery_mode_reported) {
/* we're staying in recovery mode so we'll reinitialize
* misc vector here
*/
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 49aed3e506a6..0ea0361cd86b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -64,7 +64,6 @@ struct iavf_vsi {
u16 id;
DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
int base_vector;
- u16 work_limit;
u16 qs_handle;
void *priv; /* client driver data reference. */
};
@@ -159,8 +158,12 @@ struct iavf_vlan {
struct iavf_vlan_filter {
struct list_head list;
struct iavf_vlan vlan;
- bool remove; /* filter needs to be removed */
- bool add; /* filter needs to be added */
+ struct {
+ u8 is_new_vlan:1; /* filter is new, wait for PF answer */
+ u8 remove:1; /* filter needs to be removed */
+ u8 add:1; /* filter needs to be added */
+ u8 padding:5;
+ };
};
#define IAVF_MAX_TRAFFIC_CLASS 4
@@ -461,6 +464,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state)
return "__IAVF_INIT_VERSION_CHECK";
case __IAVF_INIT_GET_RESOURCES:
return "__IAVF_INIT_GET_RESOURCES";
+ case __IAVF_INIT_EXTENDED_CAPS:
+ return "__IAVF_INIT_EXTENDED_CAPS";
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ return "__IAVF_INIT_CONFIG_ADAPTER";
case __IAVF_INIT_SW:
return "__IAVF_INIT_SW";
case __IAVF_INIT_FAILED:
@@ -520,6 +527,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter);
int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
void iavf_configure_queues(struct iavf_adapter *adapter);
void iavf_deconfigure_queues(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 3bb56714beb0..e535d4c3da49 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
struct iavf_ring *rx_ring, *tx_ring;
- ec->tx_max_coalesced_frames = vsi->work_limit;
- ec->rx_max_coalesced_frames = vsi->work_limit;
-
/* Rx and Tx usecs per queue value. If user doesn't specify the
* queue, return queue 0's value to represent.
*/
@@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
int i;
- if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
- vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
@@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
static const struct ethtool_ops iavf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = iavf_get_drvinfo,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index f3ecb3bca33d..2e2c153ce46a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter)
* iavf_get_num_vlans_added - get number of VLANs added
* @adapter: board private structure
*/
-static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
{
return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
@@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev,
if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
return -ENOMEM;
- if (proto == cpu_to_be16(ETH_P_8021Q))
- set_bit(vid, adapter->vsi.active_cvlans);
- else
- set_bit(vid, adapter->vsi.active_svlans);
-
return 0;
}
@@ -2245,7 +2240,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
adapter->vsi.back = adapter;
adapter->vsi.base_vector = 1;
- adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
vsi->netdev = adapter->netdev;
vsi->qs_handle = adapter->vsi_res->qset_handle;
if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2956,6 +2950,9 @@ continue_reset:
adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
iavf_misc_irq_enable(adapter);
+ bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
+ bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
+
mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 978f651c6b09..06d18797d25a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
struct iavf_tx_buffer *tx_buf;
struct iavf_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
- unsigned int budget = vsi->work_limit;
+ unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = IAVF_TX_DESC(tx_ring, i);
@@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
{
struct iavf_rx_buffer *rx_buffer;
- if (!size)
- return NULL;
-
rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
prefetchw(rx_buffer->page);
+ if (!size)
+ return rx_buffer;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 782450d5c12f..1603e99bae4a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -627,6 +627,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter)
}
/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+ struct iavf_vlan_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ if (f->vlan.tpid == ETH_P_8021Q)
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+
+ list_del(&f->list);
+ kfree(f);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
* iavf_add_vlans
* @adapter: adapter structure
*
@@ -683,6 +710,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vvfl->vlan_id[i] = f->vlan.vid;
i++;
f->add = false;
+ f->is_new_vlan = true;
if (i == count)
break;
}
@@ -695,10 +723,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
kfree(vvfl);
} else {
+ u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+ u16 current_vlans = iavf_get_num_vlans_added(adapter);
struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
+ if ((count + current_vlans) > max_vlans &&
+ current_vlans < max_vlans) {
+ count = max_vlans - iavf_get_num_vlans_added(adapter);
+ more = true;
+ }
+
len = sizeof(*vvfl_v2) + ((count - 1) *
sizeof(struct virtchnl_vlan_filter));
if (len > IAVF_MAX_AQ_BUF_SIZE) {
@@ -725,6 +761,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
+ if (i == count)
+ break;
+
/* give priority over outer if it's enabled */
if (filtering_support->outer)
vlan = &vvfl_v2->filters[i].outer;
@@ -736,8 +775,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
i++;
f->add = false;
- if (i == count)
- break;
+ f->is_new_vlan = true;
}
}
@@ -2080,6 +2118,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
*/
iavf_netdev_features_vlan_strip_set(netdev, true);
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ iavf_vlan_add_reject(adapter);
+ dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+ iavf_stat_str(&adapter->hw, v_retval));
+ break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -2332,6 +2375,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2: {
+ struct iavf_vlan_filter *f;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ f->is_new_vlan = false;
+ if (f->vlan.tpid == ETH_P_8021Q)
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ }
+ break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
/* PF enabled vlan strip on this VF.
* Update netdev->features if needed to be in sync with ethtool.
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 70335f6e8524..4efa5e5846e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -658,7 +658,8 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
rx_desc = ICE_RX_DESC(rx_ring, i);
if (!(rx_desc->wb.status_error0 &
- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
continue;
rx_buf = &rx_ring->rx_buf[i];
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index ff2eac2f8c64..9f02b60459f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4656,6 +4656,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_set_safe_mode_caps(hw);
}
+ hw->ucast_shared = true;
+
err = ice_init_pf(pf);
if (err) {
dev_err(dev, "ice_init_pf failed: %d\n", err);
@@ -6011,10 +6013,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
if (vsi->netdev) {
ice_set_rx_mode(vsi->netdev);
- err = ice_vsi_vlan_setup(vsi);
+ if (vsi->type != ICE_VSI_LB) {
+ err = ice_vsi_vlan_setup(vsi);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
}
ice_vsi_cfg_dcb_rings(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index bb1721f1321d..f4907a3c2d19 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1310,39 +1310,6 @@ out_put_vf:
}
/**
- * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
- * @pf: PF used to reference the switch's rules
- * @umac: unicast MAC to compare against existing switch rules
- *
- * Return true on the first/any match, else return false
- */
-static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
-{
- struct ice_sw_recipe *mac_recipe_list =
- &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
- struct ice_fltr_mgmt_list_entry *list_itr;
- struct list_head *rule_head;
- struct mutex *rule_lock; /* protect MAC filter list access */
-
- rule_head = &mac_recipe_list->filt_rules;
- rule_lock = &mac_recipe_list->filt_rule_lock;
-
- mutex_lock(rule_lock);
- list_for_each_entry(list_itr, rule_head, list_entry) {
- u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
-
- if (ether_addr_equal(existing_mac, umac)) {
- mutex_unlock(rule_lock);
- return true;
- }
- }
-
- mutex_unlock(rule_lock);
-
- return false;
-}
-
-/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -1376,13 +1343,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
if (ret)
goto out_put_vf;
- if (ice_unicast_mac_exists(pf, mac)) {
- netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
- mac, vf_id, mac);
- ret = -EINVAL;
- goto out_put_vf;
- }
-
mutex_lock(&vf->cfg_lock);
/* VF is notified of its new MAC via the PF's response to the
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 3f8b7274ed2f..836dce840712 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1751,11 +1751,13 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
protocol = vlan_get_protocol(skb);
- if (eth_p_mpls(protocol))
+ if (eth_p_mpls(protocol)) {
ip.hdr = skb_inner_network_header(skb);
- else
+ l4.hdr = skb_checksum_start(skb);
+ } else {
ip.hdr = skb_network_header(skb);
- l4.hdr = skb_checksum_start(skb);
+ l4.hdr = skb_transport_header(skb);
+ }
/* compute outer L2 header size */
l2_len = ip.hdr - skb->data;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 4547bc1f7cee..24188ec594d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -2948,7 +2948,8 @@ ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi,
struct virtchnl_vlan_filtering_caps *vfc,
struct virtchnl_vlan_filter_list_v2 *vfl)
{
- u16 num_requested_filters = vsi->num_vlan + vfl->num_elements;
+ u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) +
+ vfl->num_elements;
if (num_requested_filters > vfc->max_filters)
return false;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ae17af44fe02..a5ebee7df4a8 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6171,6 +6171,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
u32 value = 0;
+ if (IGC_REMOVED(hw_addr))
+ return ~value;
+
value = readl(&hw_addr[reg]);
/* reads should not return all F's */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index e197a33d93a0..026c3b65fc37 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
#define wr32(reg, val) \
do { \
u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
- writel((val), &hw_addr[(reg)]); \
+ if (!IGC_REMOVED(hw_addr)) \
+ writel((val), &hw_addr[(reg)]); \
} while (0)
#define rd32(reg) (igc_rd32(hw, reg))
@@ -318,4 +319,6 @@ do { \
#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+#define IGC_REMOVED(h) unlikely(!(h))
+
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 921a4d977d65..8813b4dd6872 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -779,6 +779,7 @@ struct ixgbe_adapter {
#ifdef CONFIG_IXGBE_IPSEC
struct ixgbe_ipsec *ipsec;
#endif /* CONFIG_IXGBE_IPSEC */
+ spinlock_t vfs_lock;
};
static inline int ixgbe_determine_xdp_q_idx(int cpu)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 77c2e70b0860..55f91c9ff047 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
/* n-tuple support exists, always init our spinlock */
spin_lock_init(&adapter->fdir_perfect_lock);
+ /* init spinlock to avoid concurrency of VF resources */
+ spin_lock_init(&adapter->vfs_lock);
+
#ifdef CONFIG_IXGBE_DCB
ixgbe_init_dcb(adapter);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index d4e63f0644c3..a1e69c734863 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
{
unsigned int num_vfs = adapter->num_vfs, vf;
+ unsigned long flags;
int rss;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
/* set num VFs to 0 to prevent access to vfinfo */
adapter->num_vfs = 0;
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
/* put the reference to all of the vf devices */
for (vf = 0; vf < num_vfs; ++vf) {
@@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
void ixgbe_msg_task(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ unsigned long flags;
u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
for (vf = 0; vf < adapter->num_vfs; vf++) {
/* process any reset requests */
if (!ixgbe_check_for_rst(hw, vf))
@@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
if (!ixgbe_check_for_ack(hw, vf))
ixgbe_rcv_ack_from_vf(adapter, vf);
}
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
}
static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 28b19945d716..e64318c110fd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -28,6 +28,9 @@
#define MAX_RATE_EXPONENT 0x0FULL
#define MAX_RATE_MANTISSA 0xFFULL
+#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
+#define CN10K_MAX_BURST_SIZE 8453888ULL
+
/* Bitfields in NIX_TLX_PIR register */
#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
@@ -35,6 +38,9 @@
#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
+#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
+
struct otx2_tc_flow_stats {
u64 bytes;
u64 pkts;
@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
}
EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
- u32 *burst_mantissa)
+static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
+ u32 *burst_exp, u32 *burst_mantissa)
{
+ int max_burst, max_mantissa;
unsigned int tmp;
+ if (is_dev_otx2(nic->pdev)) {
+ max_burst = MAX_BURST_SIZE;
+ max_mantissa = MAX_BURST_MANTISSA;
+ } else {
+ max_burst = CN10K_MAX_BURST_SIZE;
+ max_mantissa = CN10K_MAX_BURST_MANTISSA;
+ }
+
/* Burst is calculated as
* ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
* Max supported burst size is 130,816 bytes.
*/
- burst = min_t(u32, burst, MAX_BURST_SIZE);
+ burst = min_t(u32, burst, max_burst);
if (burst) {
*burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
tmp = burst - rounddown_pow_of_two(burst);
- if (burst < MAX_BURST_MANTISSA)
+ if (burst < max_mantissa)
*burst_mantissa = tmp * 2;
else
*burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
} else {
*burst_exp = MAX_BURST_EXPONENT;
- *burst_mantissa = MAX_BURST_MANTISSA;
+ *burst_mantissa = max_mantissa;
}
}
-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
u32 *mantissa, u32 *div_exp)
{
- unsigned int tmp;
+ u64 tmp;
/* Rate calculation by hardware
*
@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
}
}
-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
+static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
+ u64 maxrate, u32 burst)
{
- struct otx2_hw *hw = &nic->hw;
- struct nix_txschq_config *req;
u32 burst_exp, burst_mantissa;
u32 exp, mantissa, div_exp;
+ u64 regval = 0;
+
+ /* Get exponent and mantissa values from the desired rate */
+ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
+ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+
+ if (is_dev_otx2(nic->pdev)) {
+ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ } else {
+ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ }
+
+ return regval;
+}
+
+static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
+ u32 burst, u64 maxrate)
+{
+ struct otx2_hw *hw = &nic->hw;
+ struct nix_txschq_config *req;
int txschq, err;
/* All SQs share the same TL4, so pick the first scheduler */
txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
- /* Get exponent and mantissa values from the desired rate */
- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
-
mutex_lock(&nic->mbox.lock);
req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
if (!req) {
@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
req->lvl = NIX_TXSCH_LVL_TL4;
req->num_regs = 1;
req->reg[0] = NIX_AF_TL4X_PIR(txschq);
- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
err = otx2_sync_mbox_msg(&nic->mbox);
mutex_unlock(&nic->mbox.lock);
@@ -230,7 +264,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
struct netlink_ext_ack *extack = cls->common.extack;
struct flow_action *actions = &cls->rule->action;
struct flow_action_entry *entry;
- u32 rate;
+ u64 rate;
int err;
err = otx2_tc_validate_flow(nic, actions, extack);
@@ -256,7 +290,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
}
/* Convert bytes per second to Mbps */
rate = entry->police.rate_bytes_ps * 8;
- rate = max_t(u32, rate / 1000000, 1);
+ rate = max_t(u64, rate / 1000000, 1);
err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
if (err)
return err;
@@ -614,21 +648,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
flow_spec->dport = match.key->dst;
flow_mask->dport = match.mask->dst;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_DPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_DPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_DPORT_SCTP);
+
+ if (flow_mask->dport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_DPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_DPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_DPORT_SCTP);
+ }
flow_spec->sport = match.key->src;
flow_mask->sport = match.mask->src;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_SPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_SPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_SPORT_SCTP);
+
+ if (flow_mask->sport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_SPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_SPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_SPORT_SCTP);
+ }
}
return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
index d43e503c644f..4d93ad6a284c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -167,12 +167,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
}
port = netdev_priv(ingress_dev);
- mask = htons(0x1FFF);
- key = htons(port->hw_id);
+ mask = htons(0x1FFF << 3);
+ key = htons(port->hw_id << 3);
rule_match_set(r_match->key, SYS_PORT, key);
rule_match_set(r_match->mask, SYS_PORT, mask);
- mask = htons(0x1FF);
+ mask = htons(0x3FF);
key = htons(port->dev_id);
rule_match_set(r_match->key, SYS_DEV, key);
rule_match_set(r_match->mask, SYS_DEV, mask);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 90e7dfd011c9..5d457bc9acc1 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
};
struct net_device_path path = {};
+ if (!ctx.dev)
+ return -ENODEV;
+
memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 8f0cd3196aac..29be2fcafea3 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
* WDMA RX.
*/
- BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring));
if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0d8a0068e4ca..ce33dbde124d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5384,7 +5384,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
{
const struct fib_nh *nh = fib_info_nh(fi, 0);
- return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ return nh->fib_nh_gw_family ||
mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
@@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
unsigned long *fields = config->fields;
u32 hash_fields;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
mlxsw_sp_mp4_hash_outer_addr(config);
break;
@@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_mp_hash_inner_l3(config);
break;
case 3:
- hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
/* Outer */
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
@@ -10523,13 +10523,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
struct net *net = mlxsw_sp_net(mlxsw_sp);
- bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
u64 max_rifs;
+ bool usp;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
return -EIO;
max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+ usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 005e56ea5da1..5893770bfd94 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
unsigned int vid,
enum macaccess_entry_type type)
{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
lan966x_mac_select(lan966x, mac, vid);
/* Issue a write command */
@@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
lan966x, ANA_MACACCESS);
- return lan966x_mac_wait_for_completion(lan966x);
+ ret = lan966x_mac_wait_for_completion(lan966x);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
}
/* The mask of the front ports is encoded inside the mac parameter via a call
@@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port,
return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
}
-int lan966x_mac_forget(struct lan966x *lan966x,
- const unsigned char mac[ETH_ALEN],
- unsigned int vid,
- enum macaccess_entry_type type)
+static int lan966x_mac_forget_locked(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
{
+ lockdep_assert_held(&lan966x->mac_lock);
+
lan966x_mac_select(lan966x, mac, vid);
/* Issue a forget command */
@@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x,
return lan966x_mac_wait_for_completion(lan966x);
}
+int lan966x_mac_forget(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
+ ret = lan966x_mac_forget_locked(lan966x, mac, vid, type);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
+}
+
int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid)
{
return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED);
@@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
{
struct lan966x_mac_entry *mac_entry;
- mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL);
+ mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC);
if (!mac_entry)
return NULL;
@@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
struct lan966x_mac_entry *res = NULL;
struct lan966x_mac_entry *mac_entry;
- spin_lock(&lan966x->mac_lock);
list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
if (mac_entry->vid == vid &&
ether_addr_equal(mac, mac_entry->mac) &&
@@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
break;
}
}
- spin_unlock(&lan966x->mac_lock);
return res;
}
@@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
{
struct lan966x_mac_entry *mac_entry;
- if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL))
+ spin_lock(&lan966x->mac_lock);
+ if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) {
+ spin_unlock(&lan966x->mac_lock);
return 0;
+ }
/* In case the entry already exists, don't add it again to SW,
* just update HW, but we need to look in the actual HW because
@@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
* add the entry but without the extern_learn flag.
*/
mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port);
- if (mac_entry)
- return lan966x_mac_learn(lan966x, port->chip_port,
- addr, vid, ENTRYTYPE_LOCKED);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ goto mac_learn;
+ }
mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return -ENOMEM;
+ }
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
- lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+mac_learn:
+ lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
+
return 0;
}
@@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
list) {
if (mac_entry->vid == vid &&
ether_addr_equal(addr, mac_entry->mac)) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x)
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
list) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid, ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
{
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
+ struct list_head mac_deleted_entries;
u32 dest_idx;
u32 column;
u16 vid;
+ INIT_LIST_HEAD(&mac_deleted_entries);
+
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) {
bool found = false;
@@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
}
if (!found) {
- /* Notify the bridge that the entry doesn't exist
- * anymore in the HW and remove the entry from the SW
- * list
- */
- lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
- mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
-
list_del(&mac_entry->list);
- kfree(mac_entry);
+ /* Move the entry from SW list to a tmp list such that
+ * it would be deleted later
+ */
+ list_add_tail(&mac_entry->list, &mac_deleted_entries);
}
}
spin_unlock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) {
+ /* Notify the bridge that the entry doesn't exist
+ * anymore in the HW
+ */
+ lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+ mac_entry->mac, mac_entry->vid,
+ lan966x->ports[mac_entry->port_index]->dev);
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+
/* Now go to the list of columns and see if any entry was not in the SW
* list, then that means that the entry is new so it needs to notify the
* bridge.
@@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
+ spin_lock(&lan966x->mac_lock);
+ mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ continue;
+ }
+
mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return;
+ }
mac_entry->row = row;
-
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
@@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
lan966x, ANA_MACTINDX);
while (1) {
+ spin_lock(&lan966x->mac_lock);
lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT),
ANA_MACACCESS_MAC_TABLE_CMD,
lan966x, ANA_MACACCESS);
@@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
stop = false;
if (column == LAN966X_MAC_COLUMNS - 1 &&
- index == 0 && stop)
+ index == 0 && stop) {
+ spin_unlock(&lan966x->mac_lock);
break;
+ }
entry[column].mach = lan_rd(lan966x, ANA_MACHDATA);
entry[column].macl = lan_rd(lan966x, ANA_MACLDATA);
entry[column].maca = lan_rd(lan966x, ANA_MACACCESS);
+ spin_unlock(&lan966x->mac_lock);
/* Once all the columns are read process them */
if (column == LAN966X_MAC_COLUMNS - 1) {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index e31f8fbbc696..df2ab5cbd49b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -4233,7 +4233,7 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
}
/* If the chain is ended by an load/store pair then this
- * could serve as the new head of the the next chain.
+ * could serve as the new head of the next chain.
*/
if (curr_pair_is_memcpy(meta1, meta2)) {
head_ld_meta = meta1;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 0147de405365..ffb6f6d05a07 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
ip_rt_put(rt);
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
}
}
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 4625f85acab2..10ad0b93d283 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
if (tx_queue && tx_queue->timestamping) {
+ /* This code invokes normal driver TX code which is always
+ * protected from softirqs when called from generic TX code,
+ * which in turn disables preemption. Look at __dev_queue_xmit
+ * which uses rcu_read_lock_bh disabling preemption for RCU
+ * plus disabling softirqs. We do not need RCU reader
+ * protection here.
+ *
+ * Although it is theoretically safe for current PTP TX/RX code
+ * running without disabling softirqs, there are three good
+ * reasond for doing so:
+ *
+ * 1) The code invoked is mainly implemented for non-PTP
+ * packets and it is always executed with softirqs
+ * disabled.
+ * 2) This being a single PTP packet, better to not
+ * interrupt its processing by softirqs which can lead
+ * to high latencies.
+ * 3) netdev_xmit_more checks preemption is disabled and
+ * triggers a BUG_ON if not.
+ */
+ local_bh_disable();
efx_enqueue_skb(tx_queue, skb);
+ local_bh_enable();
} else {
WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 38fe77d1035e..3fe720c5dc9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
*art_time = ns;
}
+static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
+{
+ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
+}
+
static int intel_crosststamp(ktime_t *device,
struct system_counterval_t *system,
void *ctx)
@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device,
u32 num_snapshot;
u32 gpio_value;
u32 acr_value;
- int ret;
- u32 v;
int i;
if (!boot_cpu_has(X86_FEATURE_ART))
@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device,
if (priv->plat->ext_snapshot_en)
return -EBUSY;
+ priv->plat->int_snapshot_en = 1;
+
mutex_lock(&priv->aux_ts_lock);
/* Enable Internal snapshot trigger */
acr_value = readl(ptpaddr + PTP_ACR);
@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device,
break;
default:
mutex_unlock(&priv->aux_ts_lock);
+ priv->plat->int_snapshot_en = 0;
return -EINVAL;
}
writel(acr_value, ptpaddr + PTP_ACR);
@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device,
gpio_value |= GMAC_GPO1;
writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
- /* Poll for time sync operation done */
- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
- (v & GMAC_INT_TSIE), 100, 10000);
-
- if (ret == -ETIMEDOUT) {
- pr_err("%s: Wait for time sync operation timeout\n", __func__);
- return ret;
+ /* Time sync done Indication - Interrupt method */
+ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
+ stmmac_cross_ts_isr(priv),
+ HZ / 100)) {
+ priv->plat->int_snapshot_en = 0;
+ return -ETIMEDOUT;
}
num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device,
}
system->cycles *= intel_priv->crossts_adj;
+ priv->plat->int_snapshot_en = 0;
return 0;
}
@@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->has_crossts = true;
plat->crosststamp = intel_crosststamp;
+ plat->int_snapshot_en = 0;
/* Setup MSI vector offset specific to Intel mGbE controller */
plat->msi_mac_vec = 29;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 6ff88df58767..d42e1afb6521 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
}
}
- ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
- if (ret) {
- dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
- return ret;
- }
-
- ret = clk_prepare_enable(plat->rmii_internal_clk);
- if (ret) {
- dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret);
- goto err_clk;
- }
-
return 0;
-
-err_clk:
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
- return ret;
-}
-
-static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
-{
- struct mediatek_dwmac_plat_data *plat = priv;
- const struct mediatek_dwmac_variant *variant = plat->variant;
-
- clk_disable_unprepare(plat->rmii_internal_clk);
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
}
static int mediatek_dwmac_clks_config(void *priv, bool enabled)
@@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->addr64 = priv_plat->variant->dma_bit_mask;
plat->bsp_priv = priv_plat;
plat->init = mediatek_dwmac_init;
- plat->exit = mediatek_dwmac_exit;
plat->clks_config = mediatek_dwmac_clks_config;
if (priv_plat->variant->dwmac_fix_mac_speed)
plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
@@ -712,13 +686,33 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
mediatek_dwmac_init(pdev, priv_plat);
+ ret = mediatek_dwmac_clks_config(priv_plat, true);
+ if (ret)
+ goto err_remove_config_dt;
+
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret) {
- stmmac_remove_config_dt(pdev, plat_dat);
- return ret;
- }
+ if (ret)
+ goto err_drv_probe;
return 0;
+
+err_drv_probe:
+ mediatek_dwmac_clks_config(priv_plat, false);
+err_remove_config_dt:
+ stmmac_remove_config_dt(pdev, plat_dat);
+
+ return ret;
+}
+
+static int mediatek_dwmac_remove(struct platform_device *pdev)
+{
+ struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
+ int ret;
+
+ ret = stmmac_pltfr_remove(pdev);
+ mediatek_dwmac_clks_config(priv_plat, false);
+
+ return ret;
}
static const struct of_device_id mediatek_dwmac_match[] = {
@@ -733,7 +727,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
static struct platform_driver mediatek_dwmac_driver = {
.probe = mediatek_dwmac_probe,
- .remove = stmmac_pltfr_remove,
+ .remove = mediatek_dwmac_remove,
.driver = {
.name = "dwmac-mediatek",
.pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 462ca7ed095a..71dad409f78b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -150,7 +150,8 @@
#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
GMAC_INT_PCS_ANE)
-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
+#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
+ GMAC_INT_TSIE)
enum dwmac4_irq_status {
time_stamp_irq = 0x00001000,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index fd41db65fe1d..d8f1fbc25bdd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -23,6 +23,7 @@
static void dwmac4_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
@@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw,
value |= GMAC_INT_FPE_EN;
writel(value, ioaddr + GMAC_INT_EN);
+
+ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
+ init_waitqueue_head(&priv->tstamp_busy_wait);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
if (queue == 0 || queue == 4) {
value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+ } else if (queue > 4) {
+ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
+ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
} else {
value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 57970ae2178d..f9e83964aa7e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -266,6 +266,7 @@ struct stmmac_priv {
rwlock_t ptp_lock;
/* Protects auxiliary snapshot registers from concurrent access. */
struct mutex aux_ts_lock;
+ wait_queue_head_t tstamp_busy_wait;
void __iomem *mmcaddr;
void __iomem *ptpaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index abfb3cd5958d..9c3055ee2608 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
netdev_warn(priv->dev,
"Setting EEE tx-lpi is not supported\n");
- if (priv->hw->xpcs) {
- ret = xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- edata->eee_enabled);
- if (ret)
- return ret;
- }
-
if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 92d32940aff0..764832f4dae1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
u64 ptp_time;
int i;
+ if (priv->plat->int_snapshot_en) {
+ wake_up(&priv->tstamp_busy_wait);
+ return;
+ }
+
tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
if (!tsync_int)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d1a7cf4567bc..c5f33630e771 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
struct timespec64 now;
u32 sec_inc = 0;
u64 temp = 0;
- int ret;
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
- ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
- if (ret < 0) {
- netdev_warn(priv->dev,
- "failed to enable PTP reference clock: %pe\n",
- ERR_PTR(ret));
- return ret;
- }
-
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
@@ -3270,6 +3261,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
stmmac_mmc_setup(priv);
+ if (ptp_register) {
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0)
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ }
+
ret = stmmac_init_ptp(priv);
if (ret == -EOPNOTSUPP)
netdev_info(priv->dev, "PTP not supported by HW\n");
@@ -7213,8 +7212,6 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
pm_runtime_get_sync(dev);
- pm_runtime_disable(dev);
- pm_runtime_put_noidle(dev);
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
@@ -7241,6 +7238,9 @@ int stmmac_dvr_remove(struct device *dev)
mutex_destroy(&priv->lock);
bitmap_free(priv->af_xdp_zc_qps);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
return 0;
}
EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 11e1055e8260..9f5cac4000da 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
if (ret)
return ret;
- stmmac_init_tstamp_counter(priv, priv->systime_flags);
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0) {
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index e45fb191d8e6..4d11980dcd64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
struct stmmac_priv *priv =
container_of(ptp, struct stmmac_priv, ptp_clock_ops);
void __iomem *ptpaddr = priv->ptpaddr;
- void __iomem *ioaddr = priv->hw->pcsr;
struct stmmac_pps_cfg *cfg;
- u32 intr_value, acr_value;
int ret = -EOPNOTSUPP;
unsigned long flags;
+ u32 acr_value;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
@@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Enable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value |= GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
-
} else {
netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Disable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value &= ~GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
}
writel(acr_value, ptpaddr + PTP_ACR);
mutex_unlock(&priv->aux_ts_lock);
diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
index 3233d145fd87..495e85abe50b 100644
--- a/drivers/net/ipa/ipa_qmi_msg.h
+++ b/drivers/net/ipa/ipa_qmi_msg.h
@@ -214,7 +214,7 @@ struct ipa_init_modem_driver_req {
/* The response to a IPA_QMI_INIT_DRIVER request begins with a standard
* QMI response, but contains other information as well. Currently we
- * simply wait for the the INIT_DRIVER transaction to complete and
+ * simply wait for the INIT_DRIVER transaction to complete and
* ignore any other data that might be returned.
*/
struct ipa_init_modem_driver_rsp {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 817577e713d7..f354fad05714 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -243,6 +243,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
#define DEFAULT_SEND_SCI true
#define DEFAULT_ENCRYPT false
#define DEFAULT_ENCODING_SA 0
+#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
static bool send_sci(const struct macsec_secy *secy)
{
@@ -1697,7 +1698,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
return false;
if (attrs[MACSEC_SA_ATTR_PN] &&
- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -1753,7 +1754,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
}
pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ if (tb_sa[MACSEC_SA_ATTR_PN] &&
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
rtnl_unlock();
@@ -1769,7 +1771,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
@@ -1842,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
return 0;
cleanup:
- kfree(rx_sa);
+ macsec_rxsa_put(rx_sa);
rtnl_unlock();
return err;
}
@@ -1939,7 +1941,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -2011,7 +2013,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
@@ -2085,7 +2087,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
cleanup:
secy->operational = was_operational;
- kfree(tx_sa);
+ macsec_txsa_put(tx_sa);
rtnl_unlock();
return err;
}
@@ -2293,7 +2295,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -3745,9 +3747,6 @@ static int macsec_changelink_common(struct net_device *dev,
secy->operational = tx_sa && tx_sa->active;
}
- if (data[IFLA_MACSEC_WINDOW])
- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
-
if (data[IFLA_MACSEC_ENCRYPT])
tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
@@ -3793,6 +3792,16 @@ static int macsec_changelink_common(struct net_device *dev,
}
}
+ if (data[IFLA_MACSEC_WINDOW]) {
+ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+
+ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
+ * for XPN cipher suites */
+ if (secy->xpn &&
+ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -3822,7 +3831,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
ret = macsec_changelink_common(dev, data);
if (ret)
- return ret;
+ goto cleanup;
/* If h/w offloading is available, propagate to the device */
if (macsec_is_offloaded(macsec)) {
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 4cfd05c15aee..d25fbb9caeba 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -896,7 +896,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
*/
ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
if (ret < 0)
- return false;
+ return ret;
if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
int speed_value;
diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
index ff22b6b1c686..36803d932dff 100644
--- a/drivers/net/sungem_phy.c
+++ b/drivers/net/sungem_phy.c
@@ -450,6 +450,7 @@ static int bcm5421_init(struct mii_phy* phy)
int can_low_power = 1;
if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
can_low_power = 0;
+ of_node_put(np);
if (can_low_power) {
/* Enable automatic low-power */
sungem_phy_write(phy, 0x1c, 0x9002);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b082819509e1..0f6efaabaa32 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
#define NETNEXT_VERSION "12"
/* Information for net */
-#define NET_VERSION "12"
+#define NET_VERSION "13"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp)
wait_oob_link_list_ready(tp);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
switch (tp->version) {
case RTL_VER_03:
@@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
@@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp)
rtl_disable(tp);
rtl_reset_bmu(tp);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
/* Clear teredo wake event. bit[15:8] is the teredo wakeup
* type. Set it to zero. bits[7:0] are the W1C bits about
* the events. Set them to all 1 to clear them.
@@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp)
ocp_data |= NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rtl_rx_vlan_en(tp, true);
rxdy_gated_en(tp, false);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 356cf8dd4164..ec8e1b3108c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -242,9 +242,15 @@ struct virtnet_info {
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for refilling if we run low on memory. */
+ /* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
+ /* Is delayed refill enabled? */
+ bool refill_enabled;
+
+ /* The lock to synchronize the access to refill_enabled */
+ spinlock_t refill_lock;
+
/* Work struct for config space updates */
struct work_struct config_work;
@@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
return p;
}
+static void enable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = true;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
+static void disable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = false;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
@@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
}
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
- schedule_delayed_work(&vi->refill, 0);
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
+ spin_lock(&vi->refill_lock);
+ if (vi->refill_enabled)
+ schedule_delayed_work(&vi->refill, 0);
+ spin_unlock(&vi->refill_lock);
+ }
}
u64_stats_update_begin(&rq->stats.syncp);
@@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
+ enable_delayed_refill(vi);
+
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
@@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i;
+ /* Make sure NAPI doesn't schedule refill work */
+ disable_delayed_refill(vi);
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
@@ -2792,6 +2820,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
+ enable_delayed_refill(vi);
+
if (netif_running(vi->dev)) {
err = virtnet_open(vi->dev);
if (err)
@@ -3535,6 +3565,7 @@ static int virtnet_probe(struct virtio_device *vdev)
vdev->priv = vi;
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ spin_lock_init(&vi->refill_lock);
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 58c72d55769a..73d9fcba3b1c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3515,6 +3515,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index f52960d2dfbe..bff144c97e66 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -32,7 +32,7 @@ config DEBUG_PINCTRL
Say Y here to add some extra checks and diagnostics to PINCTRL calls.
config PINCTRL_AMD
- tristate "AMD GPIO pin control"
+ bool "AMD GPIO pin control"
depends on HAS_IOMEM
depends on ACPI || COMPILE_TEST
select GPIOLIB
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index a140b6bfbfaa..bcde042d29dc 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -102,7 +102,7 @@ struct armada_37xx_pinctrl {
struct device *dev;
struct gpio_chip gpio_chip;
struct irq_chip irq_chip;
- spinlock_t irq_lock;
+ raw_spinlock_t irq_lock;
struct pinctrl_desc pctl;
struct pinctrl_dev *pctl_dev;
struct armada_37xx_pin_group *groups;
@@ -523,9 +523,9 @@ static void armada_37xx_irq_ack(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
writel(d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_mask(struct irq_data *d)
@@ -536,10 +536,10 @@ static void armada_37xx_irq_mask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val & ~d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_unmask(struct irq_data *d)
@@ -550,10 +550,10 @@ static void armada_37xx_irq_unmask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val | d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on)
@@ -564,14 +564,14 @@ static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
if (on)
val |= (BIT(d->hwirq % GPIO_PER_REG));
else
val &= ~(BIT(d->hwirq % GPIO_PER_REG));
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
@@ -583,7 +583,7 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type)
u32 val, reg = IRQ_POL;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
armada_37xx_irq_update_reg(&reg, d);
val = readl(info->base + reg);
switch (type) {
@@ -607,11 +607,11 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type)
break;
}
default:
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return -EINVAL;
}
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
@@ -626,7 +626,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info,
regmap_read(info->regmap, INPUT_VAL + 4*reg_idx, &l);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
p = readl(info->base + IRQ_POL + 4 * reg_idx);
if ((p ^ l) & (1 << bit_num)) {
/*
@@ -647,7 +647,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info,
ret = -1;
}
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return ret;
}
@@ -664,11 +664,11 @@ static void armada_37xx_irq_handler(struct irq_desc *desc)
u32 status;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base + IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
while (status) {
u32 hwirq = ffs(status) - 1;
u32 virq = irq_find_mapping(d, hwirq +
@@ -695,12 +695,12 @@ static void armada_37xx_irq_handler(struct irq_desc *desc)
update_status:
/* Update status in case a new IRQ appears */
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base +
IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
}
chained_irq_exit(chip, desc);
@@ -731,7 +731,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev,
struct device *dev = &pdev->dev;
unsigned int i, nr_irq_parent;
- spin_lock_init(&info->irq_lock);
+ raw_spin_lock_init(&info->irq_lock);
nr_irq_parent = of_irq_count(np);
if (!nr_irq_parent) {
@@ -1107,25 +1107,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = {
{ },
};
+static const struct regmap_config armada_37xx_pinctrl_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .use_raw_spinlock = true,
+};
+
static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev)
{
struct armada_37xx_pinctrl *info;
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
struct regmap *regmap;
+ void __iomem *base;
int ret;
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ if (IS_ERR(base)) {
+ dev_err(dev, "failed to ioremap base address: %pe\n", base);
+ return PTR_ERR(base);
+ }
+
+ regmap = devm_regmap_init_mmio(dev, base,
+ &armada_37xx_pinctrl_regmap_config);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "failed to create regmap: %pe\n", regmap);
+ return PTR_ERR(regmap);
+ }
+
info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
info->dev = dev;
-
- regmap = syscon_node_to_regmap(np);
- if (IS_ERR(regmap))
- return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n");
info->regmap = regmap;
-
info->data = of_device_get_match_data(dev);
ret = armada_37xx_pinctrl_register(pdev, info);
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 5f4a8c5c6650..dfc8ea9f3843 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -29,19 +29,12 @@
#define ocelot_clrsetbits(addr, clear, set) \
writel((readl(addr) & ~(clear)) | (set), (addr))
-/* PINCONFIG bits (sparx5 only) */
enum {
PINCONF_BIAS,
PINCONF_SCHMITT,
PINCONF_DRIVE_STRENGTH,
};
-#define BIAS_PD_BIT BIT(4)
-#define BIAS_PU_BIT BIT(3)
-#define BIAS_BITS (BIAS_PD_BIT|BIAS_PU_BIT)
-#define SCHMITT_BIT BIT(2)
-#define DRIVE_BITS GENMASK(1, 0)
-
/* GPIO standard registers */
#define OCELOT_GPIO_OUT_SET 0x0
#define OCELOT_GPIO_OUT_CLR 0x4
@@ -321,6 +314,13 @@ struct ocelot_pin_caps {
unsigned char a_functions[OCELOT_FUNC_PER_PIN]; /* Additional functions */
};
+struct ocelot_pincfg_data {
+ u8 pd_bit;
+ u8 pu_bit;
+ u8 drive_bits;
+ u8 schmitt_bit;
+};
+
struct ocelot_pinctrl {
struct device *dev;
struct pinctrl_dev *pctl;
@@ -328,10 +328,16 @@ struct ocelot_pinctrl {
struct regmap *map;
struct regmap *pincfg;
struct pinctrl_desc *desc;
+ const struct ocelot_pincfg_data *pincfg_data;
struct ocelot_pmx_func func[FUNC_MAX];
u8 stride;
};
+struct ocelot_match_data {
+ struct pinctrl_desc desc;
+ struct ocelot_pincfg_data pincfg_data;
+};
+
#define LUTON_P(p, f0, f1) \
static struct ocelot_pin_caps luton_pin_##p = { \
.pin = p, \
@@ -1325,24 +1331,27 @@ static int ocelot_hw_get_value(struct ocelot_pinctrl *info,
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 regcfg;
- ret = regmap_read(info->pincfg, pin, &regcfg);
+ ret = regmap_read(info->pincfg,
+ pin * regmap_get_reg_stride(info->pincfg),
+ &regcfg);
if (ret)
return ret;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- *val = regcfg & BIAS_BITS;
+ *val = regcfg & (opd->pd_bit | opd->pu_bit);
break;
case PINCONF_SCHMITT:
- *val = regcfg & SCHMITT_BIT;
+ *val = regcfg & opd->schmitt_bit;
break;
case PINCONF_DRIVE_STRENGTH:
- *val = regcfg & DRIVE_BITS;
+ *val = regcfg & opd->drive_bits;
break;
default:
@@ -1359,14 +1368,18 @@ static int ocelot_pincfg_clrsetbits(struct ocelot_pinctrl *info, u32 regaddr,
u32 val;
int ret;
- ret = regmap_read(info->pincfg, regaddr, &val);
+ ret = regmap_read(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ &val);
if (ret)
return ret;
val &= ~clrbits;
val |= setbits;
- ret = regmap_write(info->pincfg, regaddr, val);
+ ret = regmap_write(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ val);
return ret;
}
@@ -1379,23 +1392,27 @@ static int ocelot_hw_set_value(struct ocelot_pinctrl *info,
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- ret = ocelot_pincfg_clrsetbits(info, pin, BIAS_BITS,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->pd_bit | opd->pu_bit,
val);
break;
case PINCONF_SCHMITT:
- ret = ocelot_pincfg_clrsetbits(info, pin, SCHMITT_BIT,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->schmitt_bit,
val);
break;
case PINCONF_DRIVE_STRENGTH:
if (val <= 3)
ret = ocelot_pincfg_clrsetbits(info, pin,
- DRIVE_BITS, val);
+ opd->drive_bits,
+ val);
else
ret = -EINVAL;
break;
@@ -1425,17 +1442,20 @@ static int ocelot_pinconf_get(struct pinctrl_dev *pctldev,
if (param == PIN_CONFIG_BIAS_DISABLE)
val = (val == 0);
else if (param == PIN_CONFIG_BIAS_PULL_DOWN)
- val = (val & BIAS_PD_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pd_bit);
else /* PIN_CONFIG_BIAS_PULL_UP */
- val = (val & BIAS_PU_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pu_bit);
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+ if (!info->pincfg_data->schmitt_bit)
+ return -EOPNOTSUPP;
+
err = ocelot_hw_get_value(info, pin, PINCONF_SCHMITT, &val);
if (err)
return err;
- val = (val & SCHMITT_BIT ? true : false);
+ val = !!(val & info->pincfg_data->schmitt_bit);
break;
case PIN_CONFIG_DRIVE_STRENGTH:
@@ -1479,6 +1499,7 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
unsigned long *configs, unsigned int num_configs)
{
struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 param, arg, p;
int cfg, err = 0;
@@ -1491,8 +1512,8 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
case PIN_CONFIG_BIAS_PULL_UP:
case PIN_CONFIG_BIAS_PULL_DOWN:
arg = (param == PIN_CONFIG_BIAS_DISABLE) ? 0 :
- (param == PIN_CONFIG_BIAS_PULL_UP) ? BIAS_PU_BIT :
- BIAS_PD_BIT;
+ (param == PIN_CONFIG_BIAS_PULL_UP) ?
+ opd->pu_bit : opd->pd_bit;
err = ocelot_hw_set_value(info, pin, PINCONF_BIAS, arg);
if (err)
@@ -1501,7 +1522,10 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
- arg = arg ? SCHMITT_BIT : 0;
+ if (!opd->schmitt_bit)
+ return -EOPNOTSUPP;
+
+ arg = arg ? opd->schmitt_bit : 0;
err = ocelot_hw_set_value(info, pin, PINCONF_SCHMITT,
arg);
if (err)
@@ -1562,69 +1586,94 @@ static const struct pinctrl_ops ocelot_pctl_ops = {
.dt_free_map = pinconf_generic_dt_free_map,
};
-static struct pinctrl_desc luton_desc = {
- .name = "luton-pinctrl",
- .pins = luton_pins,
- .npins = ARRAY_SIZE(luton_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data luton_desc = {
+ .desc = {
+ .name = "luton-pinctrl",
+ .pins = luton_pins,
+ .npins = ARRAY_SIZE(luton_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc serval_desc = {
- .name = "serval-pinctrl",
- .pins = serval_pins,
- .npins = ARRAY_SIZE(serval_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data serval_desc = {
+ .desc = {
+ .name = "serval-pinctrl",
+ .pins = serval_pins,
+ .npins = ARRAY_SIZE(serval_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc ocelot_desc = {
- .name = "ocelot-pinctrl",
- .pins = ocelot_pins,
- .npins = ARRAY_SIZE(ocelot_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data ocelot_desc = {
+ .desc = {
+ .name = "ocelot-pinctrl",
+ .pins = ocelot_pins,
+ .npins = ARRAY_SIZE(ocelot_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc jaguar2_desc = {
- .name = "jaguar2-pinctrl",
- .pins = jaguar2_pins,
- .npins = ARRAY_SIZE(jaguar2_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data jaguar2_desc = {
+ .desc = {
+ .name = "jaguar2-pinctrl",
+ .pins = jaguar2_pins,
+ .npins = ARRAY_SIZE(jaguar2_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc servalt_desc = {
- .name = "servalt-pinctrl",
- .pins = servalt_pins,
- .npins = ARRAY_SIZE(servalt_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data servalt_desc = {
+ .desc = {
+ .name = "servalt-pinctrl",
+ .pins = servalt_pins,
+ .npins = ARRAY_SIZE(servalt_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc sparx5_desc = {
- .name = "sparx5-pinctrl",
- .pins = sparx5_pins,
- .npins = ARRAY_SIZE(sparx5_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data sparx5_desc = {
+ .desc = {
+ .name = "sparx5-pinctrl",
+ .pins = sparx5_pins,
+ .npins = ARRAY_SIZE(sparx5_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(4),
+ .pu_bit = BIT(3),
+ .drive_bits = GENMASK(1, 0),
+ .schmitt_bit = BIT(2),
+ },
};
-static struct pinctrl_desc lan966x_desc = {
- .name = "lan966x-pinctrl",
- .pins = lan966x_pins,
- .npins = ARRAY_SIZE(lan966x_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &lan966x_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data lan966x_desc = {
+ .desc = {
+ .name = "lan966x-pinctrl",
+ .pins = lan966x_pins,
+ .npins = ARRAY_SIZE(lan966x_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &lan966x_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(3),
+ .pu_bit = BIT(2),
+ .drive_bits = GENMASK(1, 0),
+ },
};
static int ocelot_create_group_func_map(struct device *dev,
@@ -1890,7 +1939,8 @@ static const struct of_device_id ocelot_pinctrl_of_match[] = {
{},
};
-static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
+static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev,
+ const struct ocelot_pinctrl *info)
{
void __iomem *base;
@@ -1898,7 +1948,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
- .max_register = 32,
+ .max_register = info->desc->npins * 4,
.name = "pincfg",
};
@@ -1913,6 +1963,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
static int ocelot_pinctrl_probe(struct platform_device *pdev)
{
+ const struct ocelot_match_data *data;
struct device *dev = &pdev->dev;
struct ocelot_pinctrl *info;
struct reset_control *reset;
@@ -1929,7 +1980,16 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev)
if (!info)
return -ENOMEM;
- info->desc = (struct pinctrl_desc *)device_get_match_data(dev);
+ data = device_get_match_data(dev);
+ if (!data)
+ return -EINVAL;
+
+ info->desc = devm_kmemdup(dev, &data->desc, sizeof(*info->desc),
+ GFP_KERNEL);
+ if (!info->desc)
+ return -ENOMEM;
+
+ info->pincfg_data = &data->pincfg_data;
reset = devm_reset_control_get_optional_shared(dev, "switch");
if (IS_ERR(reset))
@@ -1956,7 +2016,7 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev)
/* Pinconf registers */
if (info->desc->confops) {
- pincfg = ocelot_pinctrl_create_pincfg(pdev);
+ pincfg = ocelot_pinctrl_create_pincfg(pdev, info);
if (IS_ERR(pincfg))
dev_dbg(dev, "Failed to create pincfg regmap\n");
else
diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c
index 63429a287434..770862f45b3f 100644
--- a/drivers/pinctrl/ralink/pinctrl-ralink.c
+++ b/drivers/pinctrl/ralink/pinctrl-ralink.c
@@ -266,6 +266,8 @@ static int ralink_pinctrl_pins(struct ralink_priv *p)
p->func[i]->pin_count,
sizeof(int),
GFP_KERNEL);
+ if (!p->func[i]->pins)
+ return -ENOMEM;
for (j = 0; j < p->func[i]->pin_count; j++)
p->func[i]->pins[j] = p->func[i]->pin_first + j;
diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c
index 3ba47040ac42..2b3335ab56c6 100644
--- a/drivers/pinctrl/sunplus/sppctl.c
+++ b/drivers/pinctrl/sunplus/sppctl.c
@@ -871,6 +871,9 @@ static int sppctl_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node
}
*map = kcalloc(*num_maps + nmG, sizeof(**map), GFP_KERNEL);
+ if (*map == NULL)
+ return -ENOMEM;
+
for (i = 0; i < (*num_maps); i++) {
dt_pin = be32_to_cpu(list[i]);
pin_num = FIELD_GET(GENMASK(31, 24), dt_pin);
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 458218f88c5e..fe4971b65c64 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -176,6 +176,7 @@ config PTP_1588_CLOCK_OCP
depends on !S390
depends on COMMON_CLK
select NET_DEVLINK
+ select CRC16
help
This driver adds support for an OpenCompute time card.
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9e54fe76a9b2..35d4b398c197 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3565,7 +3565,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
if (!atomic_read(&queue->set_pci_flags_count)) {
/*
* there's no outstanding PCI any more, so we
- * have to request a PCI to be sure the the PCI
+ * have to request a PCI to be sure the PCI
* will wake at some time in the future then we
* can flush packed buffers that might still be
* hanging around, which can happen if no
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b519f4b59d30..5e8887fa02c8 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -11386,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev)
_scsih_ir_shutdown(ioc);
_scsih_nvme_shutdown(ioc);
mpt3sas_base_mask_interrupts(ioc);
+ mpt3sas_base_stop_watchdog(ioc);
ioc->shost_recovery = 1;
mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
ioc->shost_recovery = 0;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index a480c4d589f5..729e309e6034 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -450,7 +450,7 @@ static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode)
goto out_put_request;
ret = 0;
- if (hdr->iovec_count) {
+ if (hdr->iovec_count && hdr->dxfer_len) {
struct iov_iter i;
struct iovec *iov = NULL;
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 775c0bf2f923..0933948d7df3 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr,
struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
/* if an error occurred and we have an active dma, then terminate */
- dmaengine_terminate_sync(ctlr->dma_tx);
- bs->tx_dma_active = false;
- dmaengine_terminate_sync(ctlr->dma_rx);
- bs->rx_dma_active = false;
+ if (ctlr->dma_tx) {
+ dmaengine_terminate_sync(ctlr->dma_tx);
+ bs->tx_dma_active = false;
+ }
+ if (ctlr->dma_rx) {
+ dmaengine_terminate_sync(ctlr->dma_rx);
+ bs->rx_dma_active = false;
+ }
bcm2835_spi_undo_prologue(bs);
/* and reset */
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 31d778e9d255..6a7f7df1e776 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -69,7 +69,7 @@
#define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */
#define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */
#define CDNS_SPI_SS0 0x1 /* Slave Select zero */
-#define CDNS_SPI_NOSS 0x3C /* No Slave select */
+#define CDNS_SPI_NOSS 0xF /* No Slave select */
/*
* SPI Interrupt Registers bit Masks
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 7a014eeec2d0..411b1307b7fd 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -613,6 +613,10 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
rspi->dma_callbacked, HZ);
if (ret > 0 && rspi->dma_callbacked) {
ret = 0;
+ if (tx)
+ dmaengine_synchronize(rspi->ctlr->dma_tx);
+ if (rx)
+ dmaengine_synchronize(rspi->ctlr->dma_rx);
} else {
if (!ret) {
dev_err(&rspi->ctlr->dev, "DMA timeout\n");
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index c7b337480e3e..3d367be71728 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2953,37 +2953,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp, int max_timeout)
{
- int err = 0;
- unsigned long time_left;
+ unsigned long time_left = msecs_to_jiffies(max_timeout);
unsigned long flags;
+ bool pending;
+ int err;
+retry:
time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
- msecs_to_jiffies(max_timeout));
+ time_left);
- spin_lock_irqsave(hba->host->host_lock, flags);
- hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
+ /*
+ * The completion handler called complete() and the caller of
+ * this function still owns the @lrbp tag so the code below does
+ * not trigger any race conditions.
+ */
+ hba->dev_cmd.complete = NULL;
err = ufshcd_get_tr_ocs(lrbp);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
- }
- spin_unlock_irqrestore(hba->host->host_lock, flags);
-
- if (!time_left) {
+ } else {
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
- if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
+ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
- /*
- * in case of an error, after clearing the doorbell,
- * we also need to clear the outstanding_request
- * field in hba
- */
- spin_lock_irqsave(&hba->outstanding_lock, flags);
- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ /*
+ * Since clearing the command succeeded we also need to
+ * clear the task tag bit from the outstanding_reqs
+ * variable.
+ */
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ pending = test_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ if (pending) {
+ hba->dev_cmd.complete = NULL;
+ __clear_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ }
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (!pending) {
+ /*
+ * The completion handler ran while we tried to
+ * clear the command.
+ */
+ time_left = 1;
+ goto retry;
+ }
+ } else {
+ dev_err(hba->dev, "%s: failed to clear tag %d\n",
+ __func__, lrbp->task_tag);
+ }
}
return err;
diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
index e7332cc65b1f..173aea8e9997 100644
--- a/drivers/ufs/host/ufshcd-pltfrm.c
+++ b/drivers/ufs/host/ufshcd-pltfrm.c
@@ -108,9 +108,20 @@ out:
return ret;
}
+static bool phandle_exists(const struct device_node *np,
+ const char *phandle_name, int index)
+{
+ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
+
+ if (parse_np)
+ of_node_put(parse_np);
+
+ return parse_np != NULL;
+}
+
#define MAX_PROP_SIZE 32
static int ufshcd_populate_vreg(struct device *dev, const char *name,
- struct ufs_vreg **out_vreg)
+ struct ufs_vreg **out_vreg)
{
char prop_name[MAX_PROP_SIZE];
struct ufs_vreg *vreg = NULL;
@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
}
snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
- if (!of_parse_phandle(np, prop_name, 0)) {
+ if (!phandle_exists(np, prop_name, 0)) {
dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
__func__, prop_name);
goto out;
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 90ce16b6e05f..f422f9c58ba7 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -632,16 +632,19 @@ static int __init sev_guest_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct snp_guest_dev *snp_dev;
struct miscdevice *misc;
+ void __iomem *mapping;
int ret;
if (!dev->platform_data)
return -ENODEV;
data = (struct sev_guest_platform_data *)dev->platform_data;
- layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
- if (!layout)
+ mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
+ if (!mapping)
return -ENODEV;
+ layout = (__force void *)mapping;
+
ret = -ENOMEM;
snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
if (!snp_dev)
@@ -706,7 +709,7 @@ e_free_response:
e_free_request:
free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
e_unmap:
- iounmap(layout);
+ iounmap(mapping);
return ret;
}
diff --git a/fs/attr.c b/fs/attr.c
index dbe996b0dedf..b5b8835ddf15 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -22,7 +22,7 @@
* chown_ok - verify permissions to chown inode
* @mnt_userns: user namespace of the mount @inode was found from
* @inode: inode to check permissions on
- * @uid: uid to chown @inode to
+ * @ia_vfsuid: uid to chown @inode to
*
* If the inode has been found through an idmapped mount the user namespace of
* the vfsmount must be passed through @mnt_userns. This function will then
@@ -31,15 +31,15 @@
* performed on the raw inode simply passs init_user_ns.
*/
static bool chown_ok(struct user_namespace *mnt_userns,
- const struct inode *inode,
- kuid_t uid)
+ const struct inode *inode, vfsuid_t ia_vfsuid)
{
- kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, inode->i_uid))
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()) &&
+ vfsuid_eq(ia_vfsuid, vfsuid))
return true;
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (uid_eq(kuid, INVALID_UID) &&
+ if (!vfsuid_valid(vfsuid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
@@ -49,7 +49,7 @@ static bool chown_ok(struct user_namespace *mnt_userns,
* chgrp_ok - verify permissions to chgrp inode
* @mnt_userns: user namespace of the mount @inode was found from
* @inode: inode to check permissions on
- * @gid: gid to chown @inode to
+ * @ia_vfsgid: gid to chown @inode to
*
* If the inode has been found through an idmapped mount the user namespace of
* the vfsmount must be passed through @mnt_userns. This function will then
@@ -58,21 +58,19 @@ static bool chown_ok(struct user_namespace *mnt_userns,
* performed on the raw inode simply passs init_user_ns.
*/
static bool chgrp_ok(struct user_namespace *mnt_userns,
- const struct inode *inode, kgid_t gid)
+ const struct inode *inode, vfsgid_t ia_vfsgid)
{
- kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
- kgid_t mapped_gid;
-
- if (gid_eq(gid, inode->i_gid))
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) {
+ if (vfsgid_eq(ia_vfsgid, vfsgid))
return true;
- mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
- if (in_group_p(mapped_gid))
+ if (vfsgid_in_group_p(ia_vfsgid))
return true;
}
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (gid_eq(kgid, INVALID_GID) &&
+ if (!vfsgid_valid(vfsgid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
@@ -120,28 +118,29 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
goto kill_priv;
/* Make sure a caller can chown. */
- if ((ia_valid & ATTR_UID) && !chown_ok(mnt_userns, inode, attr->ia_uid))
+ if ((ia_valid & ATTR_UID) &&
+ !chown_ok(mnt_userns, inode, attr->ia_vfsuid))
return -EPERM;
/* Make sure caller can chgrp. */
- if ((ia_valid & ATTR_GID) && !chgrp_ok(mnt_userns, inode, attr->ia_gid))
+ if ((ia_valid & ATTR_GID) &&
+ !chgrp_ok(mnt_userns, inode, attr->ia_vfsgid))
return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
- kgid_t mapped_gid;
+ vfsgid_t vfsgid;
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
if (ia_valid & ATTR_GID)
- mapped_gid = mapped_kgid_fs(mnt_userns,
- i_user_ns(inode), attr->ia_gid);
+ vfsgid = attr->ia_vfsgid;
else
- mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
/* Also check the setgid bit! */
- if (!in_group_p(mapped_gid) &&
+ if (!vfsgid_in_group_p(vfsgid) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -219,9 +218,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
* setattr_copy must be called with i_mutex held.
*
* setattr_copy updates the inode's metadata with that specified
- * in attr on idmapped mounts. If file ownership is changed setattr_copy
- * doesn't map ia_uid and ia_gid. It will asssume the caller has already
- * provided the intended values. Necessary permission checks to determine
+ * in attr on idmapped mounts. Necessary permission checks to determine
* whether or not the S_ISGID property needs to be removed are performed with
* the correct idmapped mount permission helpers.
* Noticeably missing is inode size update, which is more complex
@@ -242,10 +239,8 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -254,8 +249,8 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (!in_group_p(kgid) &&
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ if (!vfsgid_in_group_p(vfsgid) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
@@ -306,9 +301,6 @@ EXPORT_SYMBOL(may_setattr);
* retry. Because breaking a delegation may take a long time, the
* caller should drop the i_mutex before doing so.
*
- * If file ownership is changed notify_change() doesn't map ia_uid and
- * ia_gid. It will asssume the caller has already provided the intended values.
- *
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported. Also, passing NULL is fine for callers holding
@@ -397,23 +389,25 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* namespace of the superblock.
*/
if (ia_valid & ATTR_UID &&
- !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+ !vfsuid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ attr->ia_vfsuid))
return -EOVERFLOW;
if (ia_valid & ATTR_GID &&
- !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+ !vfsgid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ attr->ia_vfsgid))
return -EOVERFLOW;
/* Don't allow modifications of files with invalid uids or
* gids unless those uids & gids are being made valid.
*/
if (!(ia_valid & ATTR_UID) &&
- !uid_valid(i_uid_into_mnt(mnt_userns, inode)))
+ !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)))
return -EOVERFLOW;
if (!(ia_valid & ATTR_GID) &&
- !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
+ !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
return -EOVERFLOW;
- error = security_inode_setattr(dentry, attr);
+ error = security_inode_setattr(mnt_userns, dentry, attr);
if (error)
return error;
error = try_break_deleg(inode, delegated_inode);
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index ee92634196a8..1105ce3c80cb 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -9,6 +9,15 @@ menuconfig DLM
A general purpose distributed lock manager for kernel or userspace
applications.
+config DLM_DEPRECATED_API
+ bool "DLM deprecated API"
+ depends on DLM
+ help
+ Enables deprecated DLM timeout features that will be removed in
+ later Linux kernel releases.
+
+ If you are unsure, say N.
+
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 3545fdafc6fb..71dab733cf9a 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -9,7 +9,6 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
- netlink.o \
lowcomms.o \
plock.o \
rcom.o \
@@ -18,5 +17,6 @@ dlm-y := ast.o \
requestqueue.o \
user.o \
util.o
+dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index bfac462dd3e8..19ef136f9e4f 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -255,13 +255,13 @@ void dlm_callback_work(struct work_struct *work)
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
- bastfn(lkb->lkb_astparam, callbacks[i].mode);
trace_dlm_bast(ls, lkb, callbacks[i].mode);
+ bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+ trace_dlm_ast(ls, lkb);
castfn(lkb->lkb_astparam);
- trace_dlm_ast(ls, lkb, lkb->lkb_lksb);
}
}
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 42eee2783756..ac8b62106ce0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -75,8 +75,9 @@ struct dlm_cluster {
unsigned int cl_log_info;
unsigned int cl_protocol;
unsigned int cl_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned int cl_timewarn_cs;
- unsigned int cl_waitwarn_us;
+#endif
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
@@ -102,8 +103,9 @@ enum {
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR_TIMEWARN_CS,
- CLUSTER_ATTR_WAITWARN_US,
+#endif
CLUSTER_ATTR_NEW_RSB_COUNT,
CLUSTER_ATTR_RECOVER_CALLBACKS,
CLUSTER_ATTR_CLUSTER_NAME,
@@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
-CLUSTER_ATTR(waitwarn_us, NULL);
+#endif
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
@@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
[CLUSTER_ATTR_MARK] = &cluster_attr_mark,
+#ifdef CONFIG_DLM_DEPRECATED_API
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
- [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
+#endif
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks,
[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name,
@@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_log_info = dlm_config.ci_log_info;
cl->cl_protocol = dlm_config.ci_protocol;
+#ifdef CONFIG_DLM_DEPRECATED_API
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
- cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+#endif
cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
@@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL DLM_PROTO_TCP
#define DEFAULT_MARK 0
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
-#define DEFAULT_WAITWARN_US 0
+#endif
#define DEFAULT_NEW_RSB_COUNT 128
#define DEFAULT_RECOVER_CALLBACKS 0
#define DEFAULT_CLUSTER_NAME ""
@@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = {
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_mark = DEFAULT_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
- .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+#endif
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
.ci_cluster_name = DEFAULT_CLUSTER_NAME
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index df92b0a07fc6..55c5f2c13ebd 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -37,8 +37,9 @@ struct dlm_config_info {
int ci_log_info;
int ci_protocol;
int ci_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
int ci_timewarn_cs;
- int ci_waitwarn_us;
+#endif
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 776c3ed519f0..8aca8085d24e 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -145,7 +145,9 @@ struct dlm_args {
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned long timeout;
+#endif
};
@@ -203,10 +205,20 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
+/* least significant 2 bytes are message changed, they are full transmitted
+ * but at receive side only the 2 bytes LSB will be set.
+ *
+ * Even wireshark dlm dissector does only evaluate the lower bytes and note
+ * that they may not be used on transceiver side, we assume the higher bytes
+ * are for internal use or reserved so long they are not parsed on receiver
+ * side.
+ */
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -249,10 +261,12 @@ struct dlm_lkb {
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
- struct list_head lkb_time_list;
ktime_t lkb_timestamp;
- ktime_t lkb_wait_time;
+
+#ifdef CONFIG_DLM_DEPRECATED_API
+ struct list_head lkb_time_list;
unsigned long lkb_timeout_cs;
+#endif
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
@@ -568,8 +582,10 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
+#ifdef CONFIG_DLM_DEPRECATED_API
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
+#endif
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
@@ -606,8 +622,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
- struct completion ls_members_done;
- int ls_members_result;
+ struct completion ls_recovery_done;
+ int ls_recovery_result;
struct miscdevice ls_device;
@@ -688,7 +704,9 @@ struct dlm_ls {
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
+#ifdef CONFIG_DLM_DEPRECATED_API
#define LSFL_TIMEWARN 8
+#endif
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
@@ -741,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls)
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
+#else
+static inline int dlm_netlink_init(void) { return 0; }
+static inline void dlm_netlink_exit(void) { };
+static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { };
+#endif
int dlm_plock_init(void);
void dlm_plock_exit(void);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 226822f49d30..dac7eb75dba9 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -296,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+#ifdef CONFIG_DLM_DEPRECATED_API
/* if the operation was a cancel, then return -DLM_ECANCEL, if a
timeout caused the cancel then return -ETIMEDOUT */
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
rv = -ETIMEDOUT;
}
+#endif
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
@@ -1210,7 +1212,9 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&lkb->lkb_time_list);
+#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
@@ -1306,6 +1310,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
kref_get(&lkb->lkb_ref);
}
+static void unhold_lkb_assert(struct kref *kref)
+{
+ struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
+
+ DLM_ASSERT(false, dlm_print_lkb(lkb););
+}
+
/* This is called when we need to remove a reference and are certain
it's not the last ref. e.g. del_lkb is always called between a
find_lkb/put_lkb and is always the inverse of a previous add_lkb.
@@ -1313,9 +1324,7 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
static inline void unhold_lkb(struct dlm_lkb *lkb)
{
- int rv;
- rv = kref_put(&lkb->lkb_ref, kill_lkb);
- DLM_ASSERT(!rv, dlm_print_lkb(lkb););
+ kref_put(&lkb->lkb_ref, unhold_lkb_assert);
}
static void lkb_add_ordered(struct list_head *new, struct list_head *head,
@@ -1402,75 +1411,6 @@ static int msg_reply_type(int mstype)
return -1;
}
-static int nodeid_warned(int nodeid, int num_nodes, int *warned)
-{
- int i;
-
- for (i = 0; i < num_nodes; i++) {
- if (!warned[i]) {
- warned[i] = nodeid;
- return 0;
- }
- if (warned[i] == nodeid)
- return 1;
- }
- return 0;
-}
-
-void dlm_scan_waiters(struct dlm_ls *ls)
-{
- struct dlm_lkb *lkb;
- s64 us;
- s64 debug_maxus = 0;
- u32 debug_scanned = 0;
- u32 debug_expired = 0;
- int num_nodes = 0;
- int *warned = NULL;
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
-
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (!lkb->lkb_wait_time)
- continue;
-
- debug_scanned++;
-
- us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
-
- if (us < dlm_config.ci_waitwarn_us)
- continue;
-
- lkb->lkb_wait_time = 0;
-
- debug_expired++;
- if (us > debug_maxus)
- debug_maxus = us;
-
- if (!num_nodes) {
- num_nodes = ls->ls_num_nodes;
- warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
- }
- if (!warned)
- continue;
- if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
- continue;
-
- log_error(ls, "waitwarn %x %lld %d us check connection to "
- "node %d", lkb->lkb_id, (long long)us,
- dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
- }
- mutex_unlock(&ls->ls_waiters_mutex);
- kfree(warned);
-
- if (debug_expired)
- log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
- debug_scanned, debug_expired,
- dlm_config.ci_waitwarn_us, (long long)debug_maxus);
-}
-
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
@@ -1514,7 +1454,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
- lkb->lkb_wait_time = ktime_get();
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
@@ -1842,6 +1781,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
@@ -1962,17 +1902,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (ktime_to_us(lkb->lkb_wait_time))
- lkb->lkb_wait_time = ktime_get();
- }
- mutex_unlock(&ls->ls_waiters_mutex);
}
+#else
+static void add_timeout(struct dlm_lkb *lkb) { }
+static void del_timeout(struct dlm_lkb *lkb) { }
+#endif
/* lkb is master or local copy */
@@ -2837,12 +2771,20 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, unsigned long timeout_cs,
void (*ast) (void *astparam),
void *astparam,
void (*bast) (void *astparam, int mode),
struct dlm_args *args)
+#else
+static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
+ int namelen, void (*ast)(void *astparam),
+ void *astparam,
+ void (*bast)(void *astparam, int mode),
+ struct dlm_args *args)
+#endif
{
int rv = -EINVAL;
@@ -2895,7 +2837,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
args->astfn = ast;
args->astparam = astparam;
args->bastfn = bast;
+#ifdef CONFIG_DLM_DEPRECATED_API
args->timeout = timeout_cs;
+#endif
args->mode = mode;
args->lksb = lksb;
rv = 0;
@@ -2951,7 +2895,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
+#ifdef CONFIG_DLM_DEPRECATED_API
lkb->lkb_timeout_cs = args->timeout;
+#endif
rv = 0;
out:
if (rv)
@@ -3472,10 +3418,15 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error)
goto out;
- trace_dlm_lock_start(ls, lkb, mode, flags);
+ trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
+#else
+ error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
+ &args);
+#endif
if (error)
goto out_put;
@@ -3487,7 +3438,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
- trace_dlm_lock_end(ls, lkb, mode, flags, error);
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
if (convert || error)
__put_lkb(ls, lkb);
@@ -5839,9 +5790,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
return 0;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs)
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
+ int mode, uint32_t flags, void *name, unsigned int namelen)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5864,8 +5820,13 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error) {
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
@@ -5904,9 +5865,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
return error;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs)
+#else
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5941,8 +5907,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua->bastaddr = ua_tmp->bastaddr;
ua->user_lksb = ua_tmp->user_lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error)
goto out_put;
@@ -5966,7 +5937,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid)
+ uint32_t *lkid)
{
struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 252a5898f908..a7b6474f009d 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,9 +24,15 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
-void dlm_scan_waiters(struct dlm_ls *ls);
+
+#ifdef CONFIG_DLM_DEPRECATED_API
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
+#else
+static inline void dlm_scan_timeout(struct dlm_ls *ls) { }
+static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
+#endif
+
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
@@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
+ uint32_t flags, void *name, unsigned int namelen);
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+#endif
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid);
+ uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 19ed41a5da93..3972f4d86c75 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -275,7 +275,6 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
- dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
@@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster,
ls->ls_ops_arg = ops_arg;
}
- if (flags & DLM_LSFL_TIMEWARN)
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (flags & DLM_LSFL_TIMEWARN) {
+ pr_warn_once("===============================================================\n"
+ "WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ " Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n"
+ "===============================================================\n");
+
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+ }
/* ls_exflags are forced to match among nodes, and we don't
- need to require all nodes to have some flags set */
+ * need to require all nodes to have some flags set
+ */
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
+#else
+ /* ls_exflags are forced to match among nodes, and we don't
+ * need to require all nodes to have some flags set
+ */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL));
+#endif
size = READ_ONCE(dlm_config.ci_rsbtbl_size);
ls->ls_rsbtbl_size = size;
@@ -527,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster,
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
+#endif
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
@@ -548,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
- init_completion(&ls->ls_members_done);
- ls->ls_members_result = -1;
+ init_completion(&ls->ls_recovery_done);
+ ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
@@ -645,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster,
if (error)
goto out_recoverd;
- wait_for_completion(&ls->ls_members_done);
- error = ls->ls_members_result;
+ /* wait until recovery is successful or failed */
+ wait_for_completion(&ls->ls_recovery_done);
+ error = ls->ls_recovery_result;
if (error)
goto out_members;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 19e82f08c0e0..a4e84e8d94c8 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -529,7 +529,7 @@ static void lowcomms_write_space(struct sock *sk)
return;
if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
queue_work(send_workqueue, &con->swork);
return;
}
@@ -1931,7 +1931,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
return ret;
if (!test_and_set_bit(CF_CONNECTED, &con->flags))
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
return 0;
}
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 98084e0cfccf..2af2ccfe43a9 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -534,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
- count as a negative change so the "neg" recovery steps will happen */
+ * count as a negative change so the "neg" recovery steps will happen
+ *
+ * This functionality must report all member changes to lsops or
+ * midcomms layer and must never return before.
+ */
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
@@ -583,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg;
error = ping_members(ls);
- /* error -EINTR means that a new recovery action is triggered.
- * We ignore this recovery action and let run the new one which might
- * have new member configuration.
- */
- if (error == -EINTR)
- error = 0;
-
- /* new_lockspace() may be waiting to know if the config
- * is good or bad
- */
- ls->ls_members_result = error;
- complete(&ls->ls_members_done);
-
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
@@ -675,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
- dlm_lsop_recover_prep(ls);
+ /* call recover_prep ops only once and not multiple times
+ * for each possible dlm_ls_stop() when recovery is already
+ * stopped.
+ *
+ * If we successful was able to clear LSFL_RUNNING bit and
+ * it was set we know it is the first dlm_ls_stop() call.
+ */
+ if (new)
+ dlm_lsop_recover_prep(ls);
+
return 0;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 0993eebf2060..737f185aad8d 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -29,6 +29,8 @@ struct plock_async_data {
struct plock_op {
struct list_head list;
int done;
+ /* if lock op got interrupted while waiting dlm_controld reply */
+ bool sigint;
struct dlm_plock_info info;
/* if set indicates async handling */
struct plock_async_data *data;
@@ -79,8 +81,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
-static void do_unlock_close(struct dlm_ls *ls, u64 number,
- struct file *file, struct file_lock *fl)
+static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
@@ -89,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
- op->info.fsid = ls->ls_global_id;
- op->info.number = number;
+ op->info.pid = info->pid;
+ op->info.fsid = info->fsid;
+ op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
- if (fl->fl_lmops && fl->fl_lmops->lm_grant)
- op->info.owner = (__u64) fl->fl_pid;
- else
- op->info.owner = (__u64)(long) fl->fl_owner;
+ op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
@@ -161,16 +159,24 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = wait_event_interruptible(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
spin_lock(&ops_lock);
- list_del(&op->list);
+ /* recheck under ops_lock if we got a done != 0,
+ * if so this interrupt case should be ignored
+ */
+ if (op->done != 0) {
+ spin_unlock(&ops_lock);
+ goto do_lock_wait;
+ }
+
+ op->sigint = true;
spin_unlock(&ops_lock);
- log_print("%s: wait interrupted %x %llx, op removed",
+ log_debug(ls, "%s: wait interrupted %x %llx pid %d",
__func__, ls->ls_global_id,
- (unsigned long long)number);
- dlm_release_plock_op(op);
- do_unlock_close(ls, number, file, fl);
+ (unsigned long long)number, op->info.pid);
goto out;
}
+do_lock_wait:
+
WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
@@ -378,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
- op = list_entry(send_list.next, struct plock_op, list);
+ op = list_first_entry(&send_list, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
@@ -425,6 +431,19 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (iter->info.fsid == info.fsid &&
iter->info.number == info.number &&
iter->info.owner == info.owner) {
+ if (iter->sigint) {
+ list_del(&iter->list);
+ spin_unlock(&ops_lock);
+
+ pr_debug("%s: sigint cleanup %x %llx pid %d",
+ __func__, iter->info.fsid,
+ (unsigned long long)iter->info.number,
+ iter->info.pid);
+ do_unlock_close(&iter->info);
+ memcpy(&iter->info, &info, sizeof(info));
+ dlm_release_plock_op(iter);
+ return count;
+ }
list_del_init(&iter->list);
memcpy(&iter->info, &info, sizeof(info));
if (iter->data)
@@ -443,7 +462,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
else
wake_up(&recv_wq);
} else
- log_print("%s: no op %x %llx - may got interrupted?", __func__,
+ log_print("%s: no op %x %llx", __func__,
info.fsid, (unsigned long long)info.number);
return count;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index a55dfce705dd..e15eb511b04b 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
/*
* Add or remove nodes from the lockspace's ls_nodes list.
+ *
+ * Due to the fact that we must report all membership changes to lsops
+ * or midcomms layer, it is not permitted to abort ls_recover() until
+ * this is done.
*/
error = dlm_recover_members(ls, rv, &neg);
@@ -239,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
- dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
- log_rinfo(ls, "dlm_recover %llu error %d",
- (unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
+
return error;
}
@@ -257,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
+ int error;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
@@ -266,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_unlock(&ls->ls_recover_lock);
if (rv) {
- ls_recover(ls, rv);
+ error = ls_recover(ls, rv);
+ switch (error) {
+ case 0:
+ ls->ls_recovery_result = 0;
+ complete(&ls->ls_recovery_done);
+
+ dlm_lsop_recover_done(ls);
+ break;
+ case -EINTR:
+ /* if recovery was interrupted -EINTR we wait for the next
+ * ls_recover() iteration until it hopefully succeeds.
+ */
+ log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
+ __func__, (unsigned long long)rv->seq);
+ break;
+ default:
+ log_rinfo(ls, "%s %llu error %d", __func__,
+ (unsigned long long)rv->seq, error);
+
+ /* let new_lockspace() get aware of critical error */
+ ls->ls_recovery_result = error;
+ complete(&ls->ls_recovery_done);
+ break;
+ }
+
kfree(rv->nodes);
kfree(rv);
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1060b24f18d4..99e8f0744513 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -250,6 +250,14 @@ static int device_user_lock(struct dlm_user_proc *proc,
goto out;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (params->timeout)
+ pr_warn_once("========================================================\n"
+ "WARNING: the lkb timeout feature is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ "========================================================\n");
+#endif
+
ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
if (!ua)
goto out;
@@ -262,23 +270,34 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT) {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_convert(ls, ua,
+ params->mode, params->flags,
+ params->lkid, params->lvb);
+#endif
} else if (params->flags & DLM_LKF_ORPHAN) {
error = dlm_user_adopt_orphan(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
- (unsigned long) params->timeout,
&lkid);
if (!error)
error = lkid;
} else {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_request(ls, ua,
+ params->mode, params->flags,
+ params->name, params->namelen);
+#endif
if (!error)
error = ua->lksb.sb_lkid;
}
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 19e6c56a9f47..26fa170090b8 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -17,7 +17,7 @@ struct z_erofs_decompress_req {
/* indicate the algorithm will be used for decompression */
unsigned int alg;
- bool inplace_io, partial_decoding;
+ bool inplace_io, partial_decoding, fillgaps;
};
struct z_erofs_decompressor {
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fbb037ba326e..fe8ac0e163f7 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -366,42 +366,33 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return iomap_bmap(mapping, block, &erofs_iomap_ops);
}
-static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
+static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
- loff_t align = iocb->ki_pos | iov_iter_count(to) |
- iov_iter_alignment(to);
- struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int blksize_mask;
-
- if (bdev)
- blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
- else
- blksize_mask = (1 << inode->i_blkbits) - 1;
- if (align & blksize_mask)
- return -EINVAL;
- return 0;
-}
-
-static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
/* no need taking (shared) inode lock since it's a ro filesystem */
if (!iov_iter_count(to))
return 0;
#ifdef CONFIG_FS_DAX
- if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ if (IS_DAX(inode))
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
#endif
if (iocb->ki_flags & IOCB_DIRECT) {
- int err = erofs_prepare_dio(iocb, to);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ unsigned int blksize_mask;
+
+ if (bdev)
+ blksize_mask = bdev_logical_block_size(bdev) - 1;
+ else
+ blksize_mask = (1 << inode->i_blkbits) - 1;
+
+ if ((iocb->ki_pos | iov_iter_count(to) |
+ iov_iter_alignment(to)) & blksize_mask)
+ return -EINVAL;
- if (!err)
- return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
- NULL, 0, NULL, 0);
- if (err < 0)
- return err;
+ return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
+ NULL, 0, NULL, 0);
}
return filemap_read(iocb, to, 0);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 6dca1900c733..2d55569f96ac 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -83,7 +83,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
j = 0;
/* 'valid' bounced can only be tested after a complete round */
- if (test_bit(j, bounced)) {
+ if (!rq->fillgaps && test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
@@ -91,14 +91,18 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
if (page) {
__clear_bit(j, bounced);
- if (kaddr) {
- if (kaddr + PAGE_SIZE == page_address(page))
+ if (!PageHighMem(page)) {
+ if (!i) {
+ kaddr = page_address(page);
+ continue;
+ }
+ if (kaddr &&
+ kaddr + PAGE_SIZE == page_address(page)) {
kaddr += PAGE_SIZE;
- else
- kaddr = NULL;
- } else if (!i) {
- kaddr = page_address(page);
+ continue;
+ }
}
+ kaddr = NULL;
continue;
}
kaddr = NULL;
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 05a3063cf2bc..5e59b3f523eb 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -143,6 +143,7 @@ again:
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);
+ wake_up_all(&z_erofs_lzma_wq);
z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 18e59821c597..ecf28f66b97d 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -22,10 +22,9 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name,
}
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
- void *dentry_blk, unsigned int *ofs,
+ void *dentry_blk, struct erofs_dirent *de,
unsigned int nameoff, unsigned int maxsize)
{
- struct erofs_dirent *de = dentry_blk + *ofs;
const struct erofs_dirent *end = dentry_blk + nameoff;
while (de < end) {
@@ -59,9 +58,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
/* stopped by some reason */
return 1;
++de;
- *ofs += sizeof(struct erofs_dirent);
+ ctx->pos += sizeof(struct erofs_dirent);
}
- *ofs = maxsize;
return 0;
}
@@ -90,33 +88,33 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= PAGE_SIZE) {
+ nameoff >= EROFS_BLKSIZ) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
- goto skip_this;
+ break;
}
maxsize = min_t(unsigned int,
- dirsize - ctx->pos + ofs, PAGE_SIZE);
+ dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
/* search dirents at the arbitrary position */
if (initial) {
initial = false;
ofs = roundup(ofs, sizeof(struct erofs_dirent));
+ ctx->pos = blknr_to_addr(i) + ofs;
if (ofs >= nameoff)
goto skip_this;
}
- err = erofs_fill_dentries(dir, ctx, de, &ofs,
+ err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs,
nameoff, maxsize);
-skip_this:
- ctx->pos = blknr_to_addr(i) + ofs;
-
if (err)
break;
+skip_this:
+ ctx->pos = blknr_to_addr(i) + maxsize;
++i;
ofs = 0;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 724bb57075f6..5792ca9e0d5e 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022 Alibaba Cloud
*/
#include "zdata.h"
#include "compress.h"
@@ -26,6 +27,82 @@ static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
};
+struct z_erofs_bvec_iter {
+ struct page *bvpage;
+ struct z_erofs_bvset *bvset;
+ unsigned int nr, cur;
+};
+
+static struct page *z_erofs_bvec_iter_end(struct z_erofs_bvec_iter *iter)
+{
+ if (iter->bvpage)
+ kunmap_local(iter->bvset);
+ return iter->bvpage;
+}
+
+static struct page *z_erofs_bvset_flip(struct z_erofs_bvec_iter *iter)
+{
+ unsigned long base = (unsigned long)((struct z_erofs_bvset *)0)->bvec;
+ /* have to access nextpage in advance, otherwise it will be unmapped */
+ struct page *nextpage = iter->bvset->nextpage;
+ struct page *oldpage;
+
+ DBG_BUGON(!nextpage);
+ oldpage = z_erofs_bvec_iter_end(iter);
+ iter->bvpage = nextpage;
+ iter->bvset = kmap_local_page(nextpage);
+ iter->nr = (PAGE_SIZE - base) / sizeof(struct z_erofs_bvec);
+ iter->cur = 0;
+ return oldpage;
+}
+
+static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvset_inline *bvset,
+ unsigned int bootstrap_nr,
+ unsigned int cur)
+{
+ *iter = (struct z_erofs_bvec_iter) {
+ .nr = bootstrap_nr,
+ .bvset = (struct z_erofs_bvset *)bvset,
+ };
+
+ while (cur > iter->nr) {
+ cur -= iter->nr;
+ z_erofs_bvset_flip(iter);
+ }
+ iter->cur = cur;
+}
+
+static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvec *bvec,
+ struct page **candidate_bvpage)
+{
+ if (iter->cur == iter->nr) {
+ if (!*candidate_bvpage)
+ return -EAGAIN;
+
+ DBG_BUGON(iter->bvset->nextpage);
+ iter->bvset->nextpage = *candidate_bvpage;
+ z_erofs_bvset_flip(iter);
+
+ iter->bvset->nextpage = NULL;
+ *candidate_bvpage = NULL;
+ }
+ iter->bvset->bvec[iter->cur++] = *bvec;
+ return 0;
+}
+
+static void z_erofs_bvec_dequeue(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvec *bvec,
+ struct page **old_bvpage)
+{
+ if (iter->cur == iter->nr)
+ *old_bvpage = z_erofs_bvset_flip(iter);
+ else
+ *old_bvpage = NULL;
+ *bvec = iter->bvset->bvec[iter->cur++];
+}
+
static void z_erofs_destroy_pcluster_pool(void)
{
int i;
@@ -46,7 +123,7 @@ static int z_erofs_create_pcluster_pool(void)
for (pcs = pcluster_pool;
pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
- size = struct_size(a, compressed_pages, pcs->maxpages);
+ size = struct_size(a, compressed_bvecs, pcs->maxpages);
sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
pcs->slab = kmem_cache_create(pcs->name, size, 0,
@@ -150,30 +227,29 @@ int __init z_erofs_init_zip_subsystem(void)
return err;
}
-enum z_erofs_collectmode {
- COLLECT_SECONDARY,
- COLLECT_PRIMARY,
+enum z_erofs_pclustermode {
+ Z_EROFS_PCLUSTER_INFLIGHT,
/*
- * The current collection was the tail of an exist chain, in addition
- * that the previous processed chained collections are all decided to
+ * The current pclusters was the tail of an exist chain, in addition
+ * that the previous processed chained pclusters are all decided to
* be hooked up to it.
- * A new chain will be created for the remaining collections which are
- * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
- * the next collection cannot reuse the whole page safely in
- * the following scenario:
+ * A new chain will be created for the remaining pclusters which are
+ * not processed yet, so different from Z_EROFS_PCLUSTER_FOLLOWED,
+ * the next pcluster cannot reuse the whole page safely for inplace I/O
+ * in the following scenario:
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
- * | (belongs to the next cl) | (belongs to the current cl) |
- * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+ * | (belongs to the next pcl) | (belongs to the current pcl) |
+ * |_______PCLUSTER_FOLLOWED______|________PCLUSTER_HOOKED__________|
*/
- COLLECT_PRIMARY_HOOKED,
+ Z_EROFS_PCLUSTER_HOOKED,
/*
- * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it
+ * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
* could be dispatched into bypass queue later due to uptodated managed
* pages. All related online pages cannot be reused for inplace I/O (or
- * pagevec) since it can be directly decoded without I/O submission.
+ * bvpage) since it can be directly decoded without I/O submission.
*/
- COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
+ Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
* The current collection has been linked with the owned chain, and
* could also be linked with the remaining collections, which means
@@ -184,39 +260,36 @@ enum z_erofs_collectmode {
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current cl) | (of the previous collection) |
- * | PRIMARY_FOLLOWED or | |
- * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
+ * | PCLUSTER_FOLLOWED or | |
+ * |_____PCLUSTER_HOOKED__|___________PCLUSTER_FOLLOWED____________|
*
* [ (*) the above page can be used as inplace I/O. ]
*/
- COLLECT_PRIMARY_FOLLOWED,
+ Z_EROFS_PCLUSTER_FOLLOWED,
};
struct z_erofs_decompress_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
+ struct z_erofs_bvec_iter biter;
- struct z_erofs_pagevec_ctor vector;
-
+ struct page *candidate_bvpage;
struct z_erofs_pcluster *pcl, *tailpcl;
- /* a pointer used to pick up inplace I/O pages */
- struct page **icpage_ptr;
z_erofs_next_pcluster_t owned_head;
-
- enum z_erofs_collectmode mode;
+ enum z_erofs_pclustermode mode;
bool readahead;
/* used for applying cache strategy on the fly */
bool backmost;
erofs_off_t headoffset;
+
+ /* a pointer used to pick up inplace I/O pages */
+ unsigned int icur;
};
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = COLLECT_PRIMARY_FOLLOWED, .backmost = true }
-
-static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
-static DEFINE_MUTEX(z_pagemap_global_lock);
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
enum z_erofs_cache_alloctype type,
@@ -231,24 +304,21 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
*/
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
- struct page **pages;
- pgoff_t index;
+ unsigned int i;
- if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
+ if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
- pages = pcl->compressed_pages;
- index = pcl->obj.index;
- for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) {
+ for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page;
compressed_page_t t;
struct page *newpage = NULL;
/* the compressed page was loaded before */
- if (READ_ONCE(*pages))
+ if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- page = find_get_page(mc, index);
+ page = find_get_page(mc, pcl->obj.index + i);
if (page) {
t = tag_compressed_page_justfound(page);
@@ -269,7 +339,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
}
}
- if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
+ if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL,
+ tagptr_cast_ptr(t)))
continue;
if (page)
@@ -283,7 +354,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
* managed cache since it can be moved to the bypass queue instead.
*/
if (standalone)
- fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
@@ -300,7 +371,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < pcl->pclusterpages; ++i) {
- struct page *page = pcl->compressed_pages[i];
+ struct page *page = pcl->compressed_bvecs[i].page;
if (!page)
continue;
@@ -313,7 +384,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
continue;
/* barrier is implied in the following 'unlock_page' */
- WRITE_ONCE(pcl->compressed_pages[i], NULL);
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
detach_page_private(page);
unlock_page(page);
}
@@ -323,56 +394,59 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int erofs_try_to_free_cached_page(struct page *page)
{
struct z_erofs_pcluster *const pcl = (void *)page_private(page);
- int ret = 0; /* 0 - busy */
+ int ret, i;
- if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
- unsigned int i;
+ if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
+ return 0;
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pcl->pclusterpages; ++i) {
- if (pcl->compressed_pages[i] == page) {
- WRITE_ONCE(pcl->compressed_pages[i], NULL);
- ret = 1;
- break;
- }
+ ret = 0;
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ for (i = 0; i < pcl->pclusterpages; ++i) {
+ if (pcl->compressed_bvecs[i].page == page) {
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
+ ret = 1;
+ break;
}
- erofs_workgroup_unfreeze(&pcl->obj, 1);
-
- if (ret)
- detach_page_private(page);
}
+ erofs_workgroup_unfreeze(&pcl->obj, 1);
+ if (ret)
+ detach_page_private(page);
return ret;
}
-/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
- struct page *page)
+ struct z_erofs_bvec *bvec)
{
struct z_erofs_pcluster *const pcl = fe->pcl;
- while (fe->icpage_ptr > pcl->compressed_pages)
- if (!cmpxchg(--fe->icpage_ptr, NULL, page))
+ while (fe->icur > 0) {
+ if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
+ NULL, bvec->page)) {
+ pcl->compressed_bvecs[fe->icur] = *bvec;
return true;
+ }
+ }
return false;
}
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
- struct page *page, enum z_erofs_page_type type,
- bool pvec_safereuse)
+ struct z_erofs_bvec *bvec, bool exclusive)
{
int ret;
- /* give priority for inplaceio */
- if (fe->mode >= COLLECT_PRIMARY &&
- type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- z_erofs_try_inplace_io(fe, page))
- return 0;
-
- ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
- pvec_safereuse);
- fe->pcl->vcnt += (unsigned int)ret;
- return ret ? 0 : -EAGAIN;
+ if (exclusive) {
+ /* give priority for inplaceio to use file pages first */
+ if (z_erofs_try_inplace_io(fe, bvec))
+ return 0;
+ /* otherwise, check if it can be used as a bvpage */
+ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
+ !fe->candidate_bvpage)
+ fe->candidate_bvpage = bvec->page;
+ }
+ ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage);
+ fe->pcl->vcnt += (ret >= 0);
+ return ret;
}
static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
@@ -385,7 +459,7 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* so we can attach this pcluster to our submission chain. */
- f->mode = COLLECT_PRIMARY_FOLLOWED;
+ f->mode = Z_EROFS_PCLUSTER_FOLLOWED;
return;
}
@@ -393,66 +467,21 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
* type 2, link to the end of an existing open chain, be careful
* that its submission is controlled by the original attached chain.
*/
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ if (*owned_head != &pcl->next && pcl != f->tailpcl &&
+ cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- f->mode = COLLECT_PRIMARY_HOOKED;
+ f->mode = Z_EROFS_PCLUSTER_HOOKED;
f->tailpcl = NULL;
return;
}
/* type 3, it belongs to a chain, but it isn't the end of the chain */
- f->mode = COLLECT_PRIMARY;
+ f->mode = Z_EROFS_PCLUSTER_INFLIGHT;
}
-static int z_erofs_lookup_pcluster(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
-{
- struct z_erofs_pcluster *pcl = fe->pcl;
- unsigned int length;
-
- /* to avoid unexpected loop formed by corrupted images */
- if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- if (pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- length = READ_ONCE(pcl->length);
- if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
- if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- } else {
- unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
-
- if (map->m_flags & EROFS_MAP_FULL_MAPPED)
- llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
-
- while (llen > length &&
- length != cmpxchg_relaxed(&pcl->length, length, llen)) {
- cpu_relax();
- length = READ_ONCE(pcl->length);
- }
- }
- mutex_lock(&pcl->lock);
- /* used to check tail merging loop due to corrupted images */
- if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
- fe->tailpcl = pcl;
-
- z_erofs_try_to_claim_pcluster(fe);
- return 0;
-}
-
-static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
+static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
{
+ struct erofs_map_blocks *map = &fe->map;
bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct erofs_workgroup *grp;
@@ -471,14 +500,13 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
atomic_set(&pcl->obj.refcount, 1);
pcl->algorithmformat = map->m_algorithmformat;
- pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
- (map->m_flags & EROFS_MAP_FULL_MAPPED ?
- Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
+ pcl->length = 0;
+ pcl->partial = true;
/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = fe->owned_head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
- fe->mode = COLLECT_PRIMARY_FOLLOWED;
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
/*
* lock all primary followed works before visible to others
@@ -494,7 +522,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
} else {
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
- grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
if (IS_ERR(grp)) {
err = PTR_ERR(grp);
goto err_out;
@@ -520,11 +548,10 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
{
- struct erofs_workgroup *grp;
+ struct erofs_map_blocks *map = &fe->map;
+ struct erofs_workgroup *grp = NULL;
int ret;
DBG_BUGON(fe->pcl);
@@ -533,38 +560,35 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
- if (map->m_flags & EROFS_MAP_META) {
- if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- goto tailpacking;
+ if (!(map->m_flags & EROFS_MAP_META)) {
+ grp = erofs_find_workgroup(fe->inode->i_sb,
+ map->m_pa >> PAGE_SHIFT);
+ } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
- grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ ret = -EEXIST;
} else {
-tailpacking:
- ret = z_erofs_register_pcluster(fe, inode, map);
- if (!ret)
- goto out;
- if (ret != -EEXIST)
- return ret;
+ ret = z_erofs_register_pcluster(fe);
}
- ret = z_erofs_lookup_pcluster(fe, inode, map);
- if (ret) {
- erofs_workgroup_put(&fe->pcl->obj);
+ if (ret == -EEXIST) {
+ mutex_lock(&fe->pcl->lock);
+ /* used to check tail merging loop due to corrupted images */
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = fe->pcl;
+
+ z_erofs_try_to_claim_pcluster(fe);
+ } else if (ret) {
return ret;
}
-
-out:
- z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
- fe->pcl->pagevec, fe->pcl->vcnt);
+ z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
+ Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
/* since file-backed online pages are traversed in reverse order */
- fe->icpage_ptr = fe->pcl->compressed_pages +
- z_erofs_pclusterpages(fe->pcl);
+ fe->icur = z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -593,14 +617,19 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
if (!pcl)
return false;
- z_erofs_pagevec_ctor_exit(&fe->vector, false);
+ z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
+ if (fe->candidate_bvpage) {
+ DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage));
+ fe->candidate_bvpage = NULL;
+ }
+
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
- if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+ if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
erofs_workgroup_put(&pcl->obj);
fe->pcl = NULL;
@@ -628,11 +657,10 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
- bool tight = true;
+ bool tight = true, exclusive;
enum z_erofs_cache_alloctype cache_strategy;
- enum z_erofs_page_type page_type;
- unsigned int cur, end, spiltted, index;
+ unsigned int cur, end, spiltted;
int err = 0;
/* register locked file pages as online pages in pack */
@@ -653,7 +681,7 @@ repeat:
map->m_llen = 0;
err = z_erofs_map_blocks_iter(inode, map, 0);
if (err)
- goto err_out;
+ goto out;
} else {
if (fe->pcl)
goto hitted;
@@ -663,9 +691,9 @@ repeat:
if (!(map->m_flags & EROFS_MAP_MAPPED))
goto hitted;
- err = z_erofs_collector_begin(fe, inode, map);
+ err = z_erofs_collector_begin(fe);
if (err)
- goto err_out;
+ goto out;
if (z_erofs_is_inline_pcluster(fe->pcl)) {
void *mp;
@@ -676,11 +704,12 @@ repeat:
err = PTR_ERR(mp);
erofs_err(inode->i_sb,
"failed to get inline page, err %d", err);
- goto err_out;
+ goto out;
}
get_page(fe->map.buf.page);
- WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
- fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
+ fe->map.buf.page);
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} else {
/* bind cache first when cached decompression is preferred */
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
@@ -696,10 +725,10 @@ hitted:
* Ensure the current partial page belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since
* those chains are handled asynchronously thus the page cannot be used
- * for inplace I/O or pagevec (should be processed in strict order.)
+ * for inplace I/O or bvpage (should be processed in a strict order.)
*/
- tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
- fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+ tight &= (fe->mode >= Z_EROFS_PCLUSTER_HOOKED &&
+ fe->mode != Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -707,60 +736,59 @@ hitted:
goto next_part;
}
- /* let's derive page type */
- page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
- (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
- (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
- Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
-
+ exclusive = (!cur && (!spiltted || tight));
if (cur)
- tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
+ tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
retry:
- err = z_erofs_attach_page(fe, page, page_type,
- fe->mode >= COLLECT_PRIMARY_FOLLOWED);
- /* should allocate an additional short-lived page for pagevec */
- if (err == -EAGAIN) {
- struct page *const newpage =
- alloc_page(GFP_NOFS | __GFP_NOFAIL);
-
- set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
- err = z_erofs_attach_page(fe, newpage,
- Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
- if (!err)
- goto retry;
+ err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
+ .page = page,
+ .offset = offset - map->m_la,
+ .end = end,
+ }), exclusive);
+ /* should allocate an additional short-lived page for bvset */
+ if (err == -EAGAIN && !fe->candidate_bvpage) {
+ fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL);
+ set_page_private(fe->candidate_bvpage,
+ Z_EROFS_SHORTLIVED_PAGE);
+ goto retry;
}
- if (err)
- goto err_out;
-
- index = page->index - (map->m_la >> PAGE_SHIFT);
-
- z_erofs_onlinepage_fixup(page, index, true);
+ if (err) {
+ DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage);
+ goto out;
+ }
+ z_erofs_onlinepage_split(page);
/* bump up the number of spiltted parts of a page */
++spiltted;
- /* also update nr_pages */
- fe->pcl->nr_pages = max_t(pgoff_t, fe->pcl->nr_pages, index + 1);
+ if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
+ fe->pcl->multibases = true;
+
+ if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+ fe->pcl->length == map->m_llen)
+ fe->pcl->partial = false;
+ if (fe->pcl->length < offset + end - map->m_la) {
+ fe->pcl->length = offset + end - map->m_la;
+ fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
+ }
next_part:
- /* can be used for verification */
+ /* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
+ map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
end = cur;
if (end > 0)
goto repeat;
out:
+ if (err)
+ z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
__func__, page, spiltted, map->m_llen);
return err;
-
- /* if some error occurred while processing this page */
-err_out:
- SetPageError(page);
- goto out;
}
static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
@@ -783,97 +811,137 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
-static int z_erofs_decompress_pcluster(struct super_block *sb,
- struct z_erofs_pcluster *pcl,
- struct page **pagepool)
-{
- struct erofs_sb_info *const sbi = EROFS_SB(sb);
- unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
- struct z_erofs_pagevec_ctor ctor;
- unsigned int i, inputsize, outputsize, llen, nr_pages;
- struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
- struct page **pages, **compressed_pages, *page;
+struct z_erofs_decompress_backend {
+ struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
+ struct super_block *sb;
+ struct z_erofs_pcluster *pcl;
- enum z_erofs_page_type page_type;
- bool overlapped, partial;
- int err;
+ /* pages with the longest decompressed length for deduplication */
+ struct page **decompressed_pages;
+ /* pages to keep the compressed data */
+ struct page **compressed_pages;
- might_sleep();
- DBG_BUGON(!READ_ONCE(pcl->nr_pages));
+ struct list_head decompressed_secondary_bvecs;
+ struct page **pagepool;
+ unsigned int onstack_used, nr_pages;
+};
- mutex_lock(&pcl->lock);
- nr_pages = pcl->nr_pages;
+struct z_erofs_bvec_item {
+ struct z_erofs_bvec bvec;
+ struct list_head list;
+};
- if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
- pages = pages_onstack;
- } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
- mutex_trylock(&z_pagemap_global_lock)) {
- pages = z_pagemap_global;
- } else {
- gfp_t gfp_flags = GFP_KERNEL;
+static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+ struct z_erofs_bvec *bvec)
+{
+ struct z_erofs_bvec_item *item;
- if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
- gfp_flags |= __GFP_NOFAIL;
+ if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
+ unsigned int pgnr;
+ struct page *oldpage;
- pages = kvmalloc_array(nr_pages, sizeof(struct page *),
- gfp_flags);
+ pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
+ DBG_BUGON(pgnr >= be->nr_pages);
+ oldpage = be->decompressed_pages[pgnr];
+ be->decompressed_pages[pgnr] = bvec->page;
- /* fallback to global pagemap for the lowmem scenario */
- if (!pages) {
- mutex_lock(&z_pagemap_global_lock);
- pages = z_pagemap_global;
- }
+ if (!oldpage)
+ return;
}
- for (i = 0; i < nr_pages; ++i)
- pages[i] = NULL;
-
- err = 0;
- z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
- pcl->pagevec, 0);
-
- for (i = 0; i < pcl->vcnt; ++i) {
- unsigned int pagenr;
+ /* (cold path) one pcluster is requested multiple times */
+ item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_NOFAIL);
+ item->bvec = *bvec;
+ list_add(&item->list, &be->decompressed_secondary_bvecs);
+}
- page = z_erofs_pagevec_dequeue(&ctor, &page_type);
+static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
+ int err)
+{
+ unsigned int off0 = be->pcl->pageofs_out;
+ struct list_head *p, *n;
+
+ list_for_each_safe(p, n, &be->decompressed_secondary_bvecs) {
+ struct z_erofs_bvec_item *bvi;
+ unsigned int end, cur;
+ void *dst, *src;
+
+ bvi = container_of(p, struct z_erofs_bvec_item, list);
+ cur = bvi->bvec.offset < 0 ? -bvi->bvec.offset : 0;
+ end = min_t(unsigned int, be->pcl->length - bvi->bvec.offset,
+ bvi->bvec.end);
+ dst = kmap_local_page(bvi->bvec.page);
+ while (cur < end) {
+ unsigned int pgnr, scur, len;
+
+ pgnr = (bvi->bvec.offset + cur + off0) >> PAGE_SHIFT;
+ DBG_BUGON(pgnr >= be->nr_pages);
+
+ scur = bvi->bvec.offset + cur -
+ ((pgnr << PAGE_SHIFT) - off0);
+ len = min_t(unsigned int, end - cur, PAGE_SIZE - scur);
+ if (!be->decompressed_pages[pgnr]) {
+ err = -EFSCORRUPTED;
+ cur += len;
+ continue;
+ }
+ src = kmap_local_page(be->decompressed_pages[pgnr]);
+ memcpy(dst + cur, src + scur, len);
+ kunmap_local(src);
+ cur += len;
+ }
+ kunmap_local(dst);
+ if (err)
+ z_erofs_page_mark_eio(bvi->bvec.page);
+ z_erofs_onlinepage_endio(bvi->bvec.page);
+ list_del(p);
+ kfree(bvi);
+ }
+}
- /* all pages in pagevec ought to be valid */
- DBG_BUGON(!page);
- DBG_BUGON(z_erofs_page_is_invalidated(page));
+static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
+{
+ struct z_erofs_pcluster *pcl = be->pcl;
+ struct z_erofs_bvec_iter biter;
+ struct page *old_bvpage;
+ int i;
- if (z_erofs_put_shortlivedpage(pagepool, page))
- continue;
+ z_erofs_bvec_iter_begin(&biter, &pcl->bvset, Z_EROFS_INLINE_BVECS, 0);
+ for (i = 0; i < pcl->vcnt; ++i) {
+ struct z_erofs_bvec bvec;
- if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
- pagenr = 0;
- else
- pagenr = z_erofs_onlinepage_index(page);
+ z_erofs_bvec_dequeue(&biter, &bvec, &old_bvpage);
- DBG_BUGON(pagenr >= nr_pages);
+ if (old_bvpage)
+ z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
- /*
- * currently EROFS doesn't support multiref(dedup),
- * so here erroring out one multiref page.
- */
- if (pages[pagenr]) {
- DBG_BUGON(1);
- SetPageError(pages[pagenr]);
- z_erofs_onlinepage_endio(pages[pagenr]);
- err = -EFSCORRUPTED;
- }
- pages[pagenr] = page;
+ DBG_BUGON(z_erofs_page_is_invalidated(bvec.page));
+ z_erofs_do_decompressed_bvec(be, &bvec);
}
- z_erofs_pagevec_ctor_exit(&ctor, true);
- overlapped = false;
- compressed_pages = pcl->compressed_pages;
+ old_bvpage = z_erofs_bvec_iter_end(&biter);
+ if (old_bvpage)
+ z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
+}
+static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
+ bool *overlapped)
+{
+ struct z_erofs_pcluster *pcl = be->pcl;
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ int i, err = 0;
+
+ *overlapped = false;
for (i = 0; i < pclusterpages; ++i) {
- unsigned int pagenr;
+ struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i];
+ struct page *page = bvec->page;
- page = compressed_pages[i];
- /* all compressed pages ought to be valid */
- DBG_BUGON(!page);
+ /* compressed pages ought to be present before decompressing */
+ if (!page) {
+ DBG_BUGON(1);
+ continue;
+ }
+ be->compressed_pages[i] = page;
if (z_erofs_is_inline_pcluster(pcl)) {
if (!PageUptodate(page))
@@ -883,109 +951,129 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- if (erofs_page_is_managed(sbi, page)) {
+ if (erofs_page_is_managed(EROFS_SB(be->sb), page)) {
if (!PageUptodate(page))
err = -EIO;
continue;
}
+ z_erofs_do_decompressed_bvec(be, bvec);
+ *overlapped = true;
+ }
+ }
- /*
- * only if non-head page can be selected
- * for inplace decompression
- */
- pagenr = z_erofs_onlinepage_index(page);
-
- DBG_BUGON(pagenr >= nr_pages);
- if (pages[pagenr]) {
- DBG_BUGON(1);
- SetPageError(pages[pagenr]);
- z_erofs_onlinepage_endio(pages[pagenr]);
- err = -EFSCORRUPTED;
- }
- pages[pagenr] = page;
+ if (err)
+ return err;
+ return 0;
+}
- overlapped = true;
- }
+static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
+ int err)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
+ struct z_erofs_pcluster *pcl = be->pcl;
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ unsigned int i, inputsize;
+ int err2;
+ struct page *page;
+ bool overlapped;
- /* PG_error needs checking for all non-managed pages */
- if (PageError(page)) {
- DBG_BUGON(PageUptodate(page));
- err = -EIO;
- }
+ mutex_lock(&pcl->lock);
+ be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
+
+ /* allocate (de)compressed page arrays if cannot be kept on stack */
+ be->decompressed_pages = NULL;
+ be->compressed_pages = NULL;
+ be->onstack_used = 0;
+ if (be->nr_pages <= Z_EROFS_ONSTACK_PAGES) {
+ be->decompressed_pages = be->onstack_pages;
+ be->onstack_used = be->nr_pages;
+ memset(be->decompressed_pages, 0,
+ sizeof(struct page *) * be->nr_pages);
}
+ if (pclusterpages + be->onstack_used <= Z_EROFS_ONSTACK_PAGES)
+ be->compressed_pages = be->onstack_pages + be->onstack_used;
+
+ if (!be->decompressed_pages)
+ be->decompressed_pages =
+ kvcalloc(be->nr_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
+ if (!be->compressed_pages)
+ be->compressed_pages =
+ kvcalloc(pclusterpages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
+
+ z_erofs_parse_out_bvecs(be);
+ err2 = z_erofs_parse_in_bvecs(be, &overlapped);
+ if (err2)
+ err = err2;
if (err)
goto out;
- llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
- if (nr_pages << PAGE_SHIFT >= pcl->pageofs_out + llen) {
- outputsize = llen;
- partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
- } else {
- outputsize = (nr_pages << PAGE_SHIFT) - pcl->pageofs_out;
- partial = true;
- }
-
if (z_erofs_is_inline_pcluster(pcl))
inputsize = pcl->tailpacking_size;
else
inputsize = pclusterpages * PAGE_SIZE;
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
- .sb = sb,
- .in = compressed_pages,
- .out = pages,
+ .sb = be->sb,
+ .in = be->compressed_pages,
+ .out = be->decompressed_pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = pcl->pageofs_out,
.inputsize = inputsize,
- .outputsize = outputsize,
+ .outputsize = pcl->length,
.alg = pcl->algorithmformat,
.inplace_io = overlapped,
- .partial_decoding = partial
- }, pagepool);
+ .partial_decoding = pcl->partial,
+ .fillgaps = pcl->multibases,
+ }, be->pagepool);
out:
/* must handle all compressed pages before actual file pages */
if (z_erofs_is_inline_pcluster(pcl)) {
- page = compressed_pages[0];
- WRITE_ONCE(compressed_pages[0], NULL);
+ page = pcl->compressed_bvecs[0].page;
+ WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
put_page(page);
} else {
for (i = 0; i < pclusterpages; ++i) {
- page = compressed_pages[i];
+ page = pcl->compressed_bvecs[i].page;
if (erofs_page_is_managed(sbi, page))
continue;
/* recycle all individual short-lived pages */
- (void)z_erofs_put_shortlivedpage(pagepool, page);
- WRITE_ONCE(compressed_pages[i], NULL);
+ (void)z_erofs_put_shortlivedpage(be->pagepool, page);
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
}
}
+ if (be->compressed_pages < be->onstack_pages ||
+ be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
+ kvfree(be->compressed_pages);
+ z_erofs_fill_other_copies(be, err);
- for (i = 0; i < nr_pages; ++i) {
- page = pages[i];
+ for (i = 0; i < be->nr_pages; ++i) {
+ page = be->decompressed_pages[i];
if (!page)
continue;
DBG_BUGON(z_erofs_page_is_invalidated(page));
/* recycle all individual short-lived pages */
- if (z_erofs_put_shortlivedpage(pagepool, page))
+ if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
-
- if (err < 0)
- SetPageError(page);
-
+ if (err)
+ z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
}
- if (pages == z_pagemap_global)
- mutex_unlock(&z_pagemap_global_lock);
- else if (pages != pages_onstack)
- kvfree(pages);
+ if (be->decompressed_pages != be->onstack_pages)
+ kvfree(be->decompressed_pages);
- pcl->nr_pages = 0;
+ pcl->length = 0;
+ pcl->partial = true;
+ pcl->multibases = false;
+ pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
@@ -997,22 +1085,25 @@ out:
static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
+ struct z_erofs_decompress_backend be = {
+ .sb = io->sb,
+ .pagepool = pagepool,
+ .decompressed_secondary_bvecs =
+ LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
+ };
z_erofs_next_pcluster_t owned = io->head;
while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
- struct z_erofs_pcluster *pcl;
-
- /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+ /* impossible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
-
- /* no possible that 'owned' equals NULL */
+ /* impossible that 'owned' equals Z_EROFS_PCLUSTER_NIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
- pcl = container_of(owned, struct z_erofs_pcluster, next);
- owned = READ_ONCE(pcl->next);
+ be.pcl = container_of(owned, struct z_erofs_pcluster, next);
+ owned = READ_ONCE(be.pcl->next);
- z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
- erofs_workgroup_put(&pcl->obj);
+ z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
+ erofs_workgroup_put(&be.pcl->obj);
}
}
@@ -1038,7 +1129,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (sync) {
if (!atomic_add_return(bios, &io->pending_bios))
complete(&io->u.done);
-
return;
}
@@ -1071,7 +1161,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
int justfound;
repeat:
- page = READ_ONCE(pcl->compressed_pages[nr]);
+ page = READ_ONCE(pcl->compressed_bvecs[nr].page);
oldpage = page;
if (!page)
@@ -1087,7 +1177,7 @@ repeat:
* otherwise, it will go inplace I/O path instead.
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
- WRITE_ONCE(pcl->compressed_pages[nr], page);
+ WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
set_page_private(page, 0);
tocache = true;
goto out_tocache;
@@ -1113,14 +1203,13 @@ repeat:
/* the page is still in manage cache */
if (page->mapping == mc) {
- WRITE_ONCE(pcl->compressed_pages[nr], page);
+ WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
- ClearPageError(page);
if (!PagePrivate(page)) {
/*
* impossible to be !PagePrivate(page) for
* the current restriction as well if
- * the page is already in compressed_pages[].
+ * the page is already in compressed_bvecs[].
*/
DBG_BUGON(!justfound);
@@ -1149,7 +1238,8 @@ repeat:
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
- if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
+ if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
+ oldpage, page)) {
erofs_pagepool_add(pagepool, page);
cond_resched();
goto repeat;
@@ -1186,6 +1276,7 @@ fg_out:
q = fgq;
init_completion(&fgq->u.done);
atomic_set(&fgq->pending_bios, 0);
+ q->eio = false;
}
q->sb = sb;
q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
@@ -1246,26 +1337,25 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
DBG_BUGON(PageUptodate(page));
DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (err)
- SetPageError(page);
-
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
if (!err)
SetPageUptodate(page);
unlock_page(page);
}
}
+ if (err)
+ q->eio = true;
z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
bio_put(bio);
}
-static void z_erofs_submit_queue(struct super_block *sb,
- struct z_erofs_decompress_frontend *f,
+static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct page **pagepool,
struct z_erofs_decompressqueue *fgq,
bool *force_fg)
{
- struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ struct super_block *sb = f->inode->i_sb;
+ struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
@@ -1317,7 +1407,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
struct page *page;
page = pickup_page_for_submission(pcl, i++, pagepool,
- MNGD_MAPPING(sbi));
+ mc);
if (!page)
continue;
@@ -1369,15 +1459,14 @@ submit_bio_retry:
z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
}
-static void z_erofs_runqueue(struct super_block *sb,
- struct z_erofs_decompress_frontend *f,
+static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
struct page **pagepool, bool force_fg)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
- z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
+ z_erofs_submit_queue(f, pagepool, io, &force_fg);
/* handle bypass queue (no i/o pclusters) immediately */
z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
@@ -1475,7 +1564,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
(void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
- z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+ z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, 0));
if (err)
@@ -1524,7 +1613,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
(void)z_erofs_collector_end(&f);
- z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+ z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, nr_pages));
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&pagepool);
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 58053bb5066f..e7f04c4fbb81 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -7,13 +7,10 @@
#define __EROFS_FS_ZDATA_H
#include "internal.h"
-#include "zpvec.h"
+#include "tagptr.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
-#define Z_EROFS_NR_INLINE_PAGEVECS 3
-
-#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
-#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
+#define Z_EROFS_INLINE_BVECS 2
/*
* let's leave a type here in case of introducing
@@ -21,6 +18,21 @@
*/
typedef void *z_erofs_next_pcluster_t;
+struct z_erofs_bvec {
+ struct page *page;
+ int offset;
+ unsigned int end;
+};
+
+#define __Z_EROFS_BVSET(name, total) \
+struct name { \
+ /* point to the next page which contains the following bvecs */ \
+ struct page *nextpage; \
+ struct z_erofs_bvec bvec[total]; \
+}
+__Z_EROFS_BVSET(z_erofs_bvset,);
+__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -38,24 +50,21 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
- /* A: lower limit of decompressed length and if full length or not */
+ /* L: the maximum decompression size of this round */
unsigned int length;
+ /* L: total number of bvecs */
+ unsigned int vcnt;
+
/* I: page offset of start position of decompression */
unsigned short pageofs_out;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
- /* L: maximum relative page index in pagevec[] */
- unsigned short nr_pages;
-
- /* L: total number of pages in pagevec[] */
- unsigned int vcnt;
-
union {
- /* L: inline a certain number of pagevecs for bootstrap */
- erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
+ /* L: inline a certain number of bvec for bootstrap */
+ struct z_erofs_bvset_inline bvset;
/* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu;
@@ -72,8 +81,14 @@ struct z_erofs_pcluster {
/* I: compression algorithm format */
unsigned char algorithmformat;
- /* A: compressed pages (can be cached or inplaced pages) */
- struct page *compressed_pages[];
+ /* L: whether partial decompression or not */
+ bool partial;
+
+ /* L: indicate several pageofs_outs or not */
+ bool multibases;
+
+ /* A: compressed bvecs (can be cached or inplaced pages) */
+ struct z_erofs_bvec compressed_bvecs[];
};
/* let's avoid the valid 32-bit kernel addresses */
@@ -94,6 +109,8 @@ struct z_erofs_decompressqueue {
struct completion done;
struct work_struct work;
} u;
+
+ bool eio;
};
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
@@ -108,38 +125,17 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return pcl->pclusterpages;
}
-#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
-#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
-#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
-
/*
- * waiters (aka. ongoing_packs): # to unlock the page
- * sub-index: 0 - for partial page, >= 1 full page sub-index
+ * bit 31: I/O error occurred on this page
+ * bit 0 - 30: remaining parts to complete this page
*/
-typedef atomic_t z_erofs_onlinepage_t;
-
-/* type punning */
-union z_erofs_onlinepage_converter {
- z_erofs_onlinepage_t *o;
- unsigned long *v;
-};
-
-static inline unsigned int z_erofs_onlinepage_index(struct page *page)
-{
- union z_erofs_onlinepage_converter u;
-
- DBG_BUGON(!PagePrivate(page));
- u.v = &page_private(page);
-
- return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-}
+#define Z_EROFS_PAGE_EIO (1 << 31)
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
- z_erofs_onlinepage_t o;
+ atomic_t o;
unsigned long v;
- /* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
@@ -147,49 +143,36 @@ static inline void z_erofs_onlinepage_init(struct page *page)
SetPagePrivate(page);
}
-static inline void z_erofs_onlinepage_fixup(struct page *page,
- uintptr_t index, bool down)
+static inline void z_erofs_onlinepage_split(struct page *page)
{
- union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
- int orig, orig_index, val;
-
-repeat:
- orig = atomic_read(u.o);
- orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
- if (orig_index) {
- if (!index)
- return;
+ atomic_inc((atomic_t *)&page->private);
+}
- DBG_BUGON(orig_index != index);
- }
+static inline void z_erofs_page_mark_eio(struct page *page)
+{
+ int orig;
- val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
- ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
- if (atomic_cmpxchg(u.o, orig, val) != orig)
- goto repeat;
+ do {
+ orig = atomic_read((atomic_t *)&page->private);
+ } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
+ orig | Z_EROFS_PAGE_EIO) != orig);
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
- union z_erofs_onlinepage_converter u;
unsigned int v;
DBG_BUGON(!PagePrivate(page));
- u.v = &page_private(page);
-
- v = atomic_dec_return(u.o);
- if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+ v = atomic_dec_return((atomic_t *)&page->private);
+ if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
- if (!PageError(page))
+ if (!(v & Z_EROFS_PAGE_EIO))
SetPageUptodate(page);
unlock_page(page);
}
- erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
-#define Z_EROFS_VMAP_ONSTACK_PAGES \
- min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
-#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
+#define Z_EROFS_ONSTACK_PAGES 32
#endif
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
deleted file mode 100644
index b05464f4a808..000000000000
--- a/fs/erofs/zpvec.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2018 HUAWEI, Inc.
- * https://www.huawei.com/
- */
-#ifndef __EROFS_FS_ZPVEC_H
-#define __EROFS_FS_ZPVEC_H
-
-#include "tagptr.h"
-
-/* page type in pagevec for decompress subsystem */
-enum z_erofs_page_type {
- /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
- Z_EROFS_PAGE_TYPE_EXCLUSIVE,
-
- Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
-
- Z_EROFS_VLE_PAGE_TYPE_HEAD,
- Z_EROFS_VLE_PAGE_TYPE_MAX
-};
-
-extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
- __bad_page_type_exclusive(void);
-
-/* pagevec tagged pointer */
-typedef tagptr2_t erofs_vtptr_t;
-
-/* pagevec collector */
-struct z_erofs_pagevec_ctor {
- struct page *curr, *next;
- erofs_vtptr_t *pages;
-
- unsigned int nr, index;
-};
-
-static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
- bool atomic)
-{
- if (!ctor->curr)
- return;
-
- if (atomic)
- kunmap_atomic(ctor->pages);
- else
- kunmap(ctor->curr);
-}
-
-static inline struct page *
-z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
- unsigned int nr)
-{
- unsigned int index;
-
- /* keep away from occupied pages */
- if (ctor->next)
- return ctor->next;
-
- for (index = 0; index < nr; ++index) {
- const erofs_vtptr_t t = ctor->pages[index];
- const unsigned int tags = tagptr_unfold_tags(t);
-
- if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
- return tagptr_unfold_ptr(t);
- }
- DBG_BUGON(nr >= ctor->nr);
- return NULL;
-}
-
-static inline void
-z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
- bool atomic)
-{
- struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
-
- z_erofs_pagevec_ctor_exit(ctor, atomic);
-
- ctor->curr = next;
- ctor->next = NULL;
- ctor->pages = atomic ?
- kmap_atomic(ctor->curr) : kmap(ctor->curr);
-
- ctor->nr = PAGE_SIZE / sizeof(struct page *);
- ctor->index = 0;
-}
-
-static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
- unsigned int nr,
- erofs_vtptr_t *pages,
- unsigned int i)
-{
- ctor->nr = nr;
- ctor->curr = ctor->next = NULL;
- ctor->pages = pages;
-
- if (i >= nr) {
- i -= nr;
- z_erofs_pagevec_ctor_pagedown(ctor, false);
- while (i > ctor->nr) {
- i -= ctor->nr;
- z_erofs_pagevec_ctor_pagedown(ctor, false);
- }
- }
- ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
- ctor->index = i;
-}
-
-static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
- struct page *page,
- enum z_erofs_page_type type,
- bool pvec_safereuse)
-{
- if (!ctor->next) {
- /* some pages cannot be reused as pvec safely without I/O */
- if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
- type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
-
- if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- ctor->index + 1 == ctor->nr)
- return false;
- }
-
- if (ctor->index >= ctor->nr)
- z_erofs_pagevec_ctor_pagedown(ctor, false);
-
- /* exclusive page type must be 0 */
- if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
- __bad_page_type_exclusive();
-
- /* should remind that collector->next never equal to 1, 2 */
- if (type == (uintptr_t)ctor->next) {
- ctor->next = page;
- }
- ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
- return true;
-}
-
-static inline struct page *
-z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
- enum z_erofs_page_type *type)
-{
- erofs_vtptr_t t;
-
- if (ctor->index >= ctor->nr) {
- DBG_BUGON(!ctor->next);
- z_erofs_pagevec_ctor_pagedown(ctor, true);
- }
-
- t = ctor->pages[ctor->index];
-
- *type = tagptr_unfold_tags(t);
-
- /* should remind that collector->next never equal to 1, 2 */
- if (*type == (uintptr_t)ctor->next)
- ctor->next = tagptr_unfold_ptr(t);
-
- ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
- return tagptr_unfold_ptr(t);
-}
-#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e6b932219803..7a192e4e7fa9 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1679,14 +1679,14 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(inode, iattr)) {
+ if (is_quota_modification(mnt_userns, inode, iattr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- error = dquot_transfer(inode, iattr);
+ if (i_uid_needs_update(mnt_userns, iattr, inode) ||
+ i_gid_needs_update(mnt_userns, iattr, inode)) {
+ error = dquot_transfer(mnt_userns, inode, iattr);
if (error)
return error;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f6a19f6d9f6d..6f475d2e3b18 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1059,9 +1059,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_frags_per_group);
goto failed_mount;
}
- if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
- "error: #inodes per group too big: %lu",
+ "error: invalid #inodes per group: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -1071,6 +1072,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
+ if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
+ le32_to_cpu(es->s_inodes_count),
+ (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
+ goto failed_mount;
+ }
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc_array(db_count,
@@ -1490,8 +1498,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
@@ -1529,8 +1536,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 84c0eb55071d..3dcc1dd1f179 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5350,14 +5350,14 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
- (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
+ if (i_uid_needs_update(mnt_userns, attr, inode) ||
+ i_gid_needs_update(mnt_userns, attr, inode)) {
handle_t *handle;
/* (user+group)*(old+new) structure, inode write (sb,
@@ -5374,7 +5374,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* counts xattr inode references.
*/
down_read(&EXT4_I(inode)->xattr_sem);
- error = dquot_transfer(inode, attr);
+ error = dquot_transfer(mnt_userns, inode, attr);
up_read(&EXT4_I(inode)->xattr_sem);
if (error) {
@@ -5383,10 +5383,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* Update corresponding info in inode so that everything is in
* one transaction */
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
error = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
if (unlikely(error)) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bd14cef1b08f..d66e37d80a2d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -861,10 +861,8 @@ static void __setattr_copy(struct user_namespace *mnt_userns,
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -917,17 +915,15 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (err)
return err;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
return err;
}
- if ((attr->ia_valid & ATTR_UID &&
- !uid_eq(attr->ia_uid, inode->i_uid)) ||
- (attr->ia_valid & ATTR_GID &&
- !gid_eq(attr->ia_gid, inode->i_gid))) {
+ if (i_uid_needs_update(mnt_userns, attr, inode) ||
+ i_gid_needs_update(mnt_userns, attr, inode)) {
f2fs_lock_op(F2FS_I_SB(inode));
- err = dquot_transfer(inode, attr);
+ err = dquot_transfer(mnt_userns, inode, attr);
if (err) {
set_sbi_flag(F2FS_I_SB(inode),
SBI_QUOTA_NEED_REPAIR);
@@ -938,10 +934,8 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* update uid/gid under lock_op(), so that dquot and inode can
* be updated atomically.
*/
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_unlock_op(F2FS_I_SB(inode));
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 3cb7f8a43b4d..dcd0a1e35095 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -255,18 +255,18 @@ static int recover_quota_data(struct inode *inode, struct page *page)
memset(&attr, 0, sizeof(attr));
- attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
- attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+ attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
+ attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
- if (!uid_eq(attr.ia_uid, inode->i_uid))
+ if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&init_user_ns, inode)))
attr.ia_valid |= ATTR_UID;
- if (!gid_eq(attr.ia_gid, inode->i_gid))
+ if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&init_user_ns, inode)))
attr.ia_valid |= ATTR_GID;
if (!attr.ia_valid)
return 0;
- err = dquot_transfer(inode, &attr);
+ err = dquot_transfer(&init_user_ns, inode, &attr);
if (err)
set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3dae3ed60f3a..3e4eb3467cb4 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -90,7 +90,8 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
* out the RO attribute for checking by the security
* module, just because it maps to a file mode.
*/
- err = security_inode_setattr(file->f_path.dentry, &ia);
+ err = security_inode_setattr(file_mnt_user_ns(file),
+ file->f_path.dentry, &ia);
if (err)
goto out_unlock_inode;
@@ -516,9 +517,11 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
if (((attr->ia_valid & ATTR_UID) &&
- (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
+ (!uid_eq(from_vfsuid(mnt_userns, i_user_ns(inode), attr->ia_vfsuid),
+ sbi->options.fs_uid))) ||
((attr->ia_valid & ATTR_GID) &&
- (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
+ (!gid_eq(from_vfsgid(mnt_userns, i_user_ns(inode), attr->ia_vfsgid),
+ sbi->options.fs_gid))) ||
((attr->ia_valid & ATTR_MODE) &&
(attr->ia_mode & ~FAT_VALID_MODE)))
error = -EPERM;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a01ea49f3017..e8e769be9ed0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1738,6 +1738,14 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
return;
/*
+ * READV uses fields in `struct io_rw` (len/addr) to stash the selected
+ * buffer data. However if that buffer is recycled the original request
+ * data stored in addr is lost. Therefore forbid recycling for now.
+ */
+ if (req->opcode == IORING_OP_READV)
+ return;
+
+ /*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
* If the tail has already been incremented, hang on to it.
@@ -12931,7 +12939,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
- struct io_buffer_list *bl;
+ struct io_buffer_list *bl, *free_bl = NULL;
struct page **pages;
int nr_pages;
@@ -12963,7 +12971,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}
@@ -12972,7 +12980,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
struct_size(br, bufs, reg.ring_entries),
&nr_pages);
if (IS_ERR(pages)) {
- kfree(bl);
+ kfree(free_bl);
return PTR_ERR(pages);
}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 1d732fd223d4..332dc9ac47a9 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -95,14 +95,14 @@ int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (rc)
return rc;
- if (is_quota_modification(inode, iattr)) {
+ if (is_quota_modification(mnt_userns, inode, iattr)) {
rc = dquot_initialize(inode);
if (rc)
return rc;
}
if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
(iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- rc = dquot_transfer(inode, iattr);
+ rc = dquot_transfer(mnt_userns, inode, iattr);
if (rc)
return rc;
}
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 05efcdf7a4a7..7c849024999f 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -963,7 +963,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
*/
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags)
+ void *attr_value, size_t attr_size, int flags)
{
int err;
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 8c37aaf936ab..70da4c0ba7ad 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -109,7 +109,7 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
int attr_name_len);
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags);
+ void *attr_value, size_t attr_size, int flags);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
diff --git a/fs/locks.c b/fs/locks.c
index ca28e0e50e56..c266cfdc3291 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -425,21 +425,9 @@ static inline int flock_translate_cmd(int cmd) {
}
/* Fill in a file_lock structure with an appropriate FLOCK lock. */
-static struct file_lock *
-flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
+static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
- int type = flock_translate_cmd(cmd);
-
- if (type < 0)
- return ERR_PTR(type);
-
- if (fl == NULL) {
- fl = locks_alloc_lock();
- if (fl == NULL)
- return ERR_PTR(-ENOMEM);
- } else {
- locks_init_lock(fl);
- }
+ locks_init_lock(fl);
fl->fl_file = filp;
fl->fl_owner = filp;
@@ -447,8 +435,6 @@ flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
-
- return fl;
}
static int assign_type(struct file_lock *fl, long type)
@@ -2097,21 +2083,9 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
*/
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
{
- struct fd f = fdget(fd);
- struct file_lock *lock;
- int can_sleep, unlock;
- int error;
-
- error = -EBADF;
- if (!f.file)
- goto out;
-
- can_sleep = !(cmd & LOCK_NB);
- cmd &= ~LOCK_NB;
- unlock = (cmd == LOCK_UN);
-
- if (!unlock && !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
- goto out_putf;
+ int can_sleep, error, type;
+ struct file_lock fl;
+ struct fd f;
/*
* LOCK_MAND locks were broken for a long time in that they never
@@ -2123,36 +2097,41 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
if (cmd & LOCK_MAND) {
pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
- error = 0;
- goto out_putf;
+ return 0;
}
- lock = flock_make_lock(f.file, cmd, NULL);
- if (IS_ERR(lock)) {
- error = PTR_ERR(lock);
+ type = flock_translate_cmd(cmd & ~LOCK_NB);
+ if (type < 0)
+ return type;
+
+ error = -EBADF;
+ f = fdget(fd);
+ if (!f.file)
+ return error;
+
+ if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
goto out_putf;
- }
- if (can_sleep)
- lock->fl_flags |= FL_SLEEP;
+ flock_make_lock(f.file, &fl, type);
- error = security_file_lock(f.file, lock->fl_type);
+ error = security_file_lock(f.file, fl.fl_type);
if (error)
- goto out_free;
+ goto out_putf;
+
+ can_sleep = !(cmd & LOCK_NB);
+ if (can_sleep)
+ fl.fl_flags |= FL_SLEEP;
if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
- (can_sleep) ? F_SETLKW : F_SETLK,
- lock);
+ (can_sleep) ? F_SETLKW : F_SETLK,
+ &fl);
else
- error = locks_lock_file_wait(f.file, lock);
-
- out_free:
- locks_free_lock(lock);
+ error = locks_lock_file_wait(f.file, &fl);
out_putf:
fdput(f);
- out:
+
return error;
}
@@ -2614,7 +2593,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
if (list_empty(&flctx->flc_flock))
return;
- flock_make_lock(filp, LOCK_UN, &fl);
+ flock_make_lock(filp, &fl, F_UNLCK);
fl.fl_flags |= FL_CLOSE;
if (filp->f_op->flock)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 4f897e109547..cd7d09a569ff 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -295,12 +295,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
const void *data, int data_type,
struct inode *dir)
{
- __u32 marks_mask = 0, marks_ignored_mask = 0;
+ __u32 marks_mask = 0, marks_ignore_mask = 0;
__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
FANOTIFY_EVENT_FLAGS;
const struct path *path = fsnotify_data_path(data, data_type);
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
struct fsnotify_mark *mark;
+ bool ondir = event_mask & FAN_ONDIR;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
@@ -315,19 +316,21 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
return 0;
} else if (!(fid_mode & FAN_REPORT_FID)) {
/* Do we have a directory inode to report? */
- if (!dir && !(event_mask & FS_ISDIR))
+ if (!dir && !ondir)
return 0;
}
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
- /* Apply ignore mask regardless of mark's ISDIR flag */
- marks_ignored_mask |= mark->ignored_mask;
+ /*
+ * Apply ignore mask depending on event flags in ignore mask.
+ */
+ marks_ignore_mask |=
+ fsnotify_effective_ignore_mask(mark, ondir, type);
/*
- * If the event is on dir and this mark doesn't care about
- * events on dir, don't send it!
+ * Send the event depending on event flags in mark mask.
*/
- if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
+ if (!fsnotify_mask_applicable(mark->mask, ondir, type))
continue;
marks_mask |= mark->mask;
@@ -336,7 +339,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
*match_mask |= 1U << type;
}
- test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+ test_mask = event_mask & marks_mask & ~marks_ignore_mask;
/*
* For dirent modification events (create/delete/move) that do not carry
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 80e0ec95b113..1d9f11255c64 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -499,6 +499,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
mflags |= FAN_MARK_EVICTABLE;
+ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ mflags |= FAN_MARK_IGNORE;
return mflags;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b08ce0d821a7..f0e49a406ffa 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1009,10 +1009,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
mask &= ~umask;
spin_lock(&fsn_mark->lock);
oldmask = fsnotify_calc_mask(fsn_mark);
- if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) {
fsn_mark->mask &= ~mask;
} else {
- fsn_mark->ignored_mask &= ~mask;
+ fsn_mark->ignore_mask &= ~mask;
}
newmask = fsnotify_calc_mask(fsn_mark);
/*
@@ -1021,7 +1021,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
* changes to the mask.
* Destroy mark when only umask bits remain.
*/
- *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
+ *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask);
spin_unlock(&fsn_mark->lock);
return oldmask & ~newmask;
@@ -1085,15 +1085,24 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
+ unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS;
bool recalc = false;
/*
+ * When using FAN_MARK_IGNORE for the first time, mark starts using
+ * independent event flags in ignore mask. After that, trying to
+ * update the ignore mask with the old FAN_MARK_IGNORED_MASK API
+ * will result in EEXIST error.
+ */
+ if (ignore == FAN_MARK_IGNORE)
+ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS;
+
+ /*
* Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
* the removal of the FS_MODIFY bit in calculated mask if it was set
- * because of an ignored mask that is now going to survive FS_MODIFY.
+ * because of an ignore mask that is now going to survive FS_MODIFY.
*/
- if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
- (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
if (!(fsn_mark->mask & FS_MODIFY))
@@ -1120,10 +1129,10 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
bool recalc;
spin_lock(&fsn_mark->lock);
- if (!(fan_flags & FAN_MARK_IGNORED_MASK))
+ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS))
fsn_mark->mask |= mask;
else
- fsn_mark->ignored_mask |= mask;
+ fsn_mark->ignore_mask |= mask;
recalc = fsnotify_calc_mask(fsn_mark) &
~fsnotify_conn_mask(fsn_mark->connector);
@@ -1187,6 +1196,37 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
sizeof(struct fanotify_error_event));
}
+static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
+ unsigned int fan_flags)
+{
+ /*
+ * Non evictable mark cannot be downgraded to evictable mark.
+ */
+ if (fan_flags & FAN_MARK_EVICTABLE &&
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
+ return -EEXIST;
+
+ /*
+ * New ignore mask semantics cannot be downgraded to old semantics.
+ */
+ if (fan_flags & FAN_MARK_IGNORED_MASK &&
+ fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ return -EEXIST;
+
+ /*
+ * An ignore mask that survives modify could never be downgraded to not
+ * survive modify. With new FAN_MARK_IGNORE semantics we make that rule
+ * explicit and return an error when trying to update the ignore mask
+ * without the original FAN_MARK_IGNORED_SURV_MODIFY value.
+ */
+ if (fan_flags & FAN_MARK_IGNORE &&
+ !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+ return -EEXIST;
+
+ return 0;
+}
+
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type,
__u32 mask, unsigned int fan_flags,
@@ -1208,19 +1248,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
}
/*
- * Non evictable mark cannot be downgraded to evictable mark.
+ * Check if requested mark flags conflict with an existing mark flags.
*/
- if (fan_flags & FAN_MARK_EVICTABLE &&
- !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
- ret = -EEXIST;
+ ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+ if (ret)
goto out;
- }
/*
* Error events are pre-allocated per group, only if strictly
* needed (i.e. FAN_FS_ERROR was requested).
*/
- if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
+ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) &&
+ (mask & FAN_FS_ERROR)) {
ret = fanotify_group_init_error_pool(group);
if (ret)
goto out;
@@ -1261,10 +1300,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
/*
* If some other task has this inode open for write we should not add
- * an ignored mark, unless that ignored mark is supposed to survive
+ * an ignore mask, unless that ignore mask is supposed to survive
* modification changes anyway.
*/
- if ((flags & FAN_MARK_IGNORED_MASK) &&
+ if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
inode_is_open_for_write(inode))
return 0;
@@ -1520,7 +1559,8 @@ static int fanotify_events_supported(struct fsnotify_group *group,
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
- (mask & FAN_RENAME);
+ (mask & FAN_RENAME) ||
+ (flags & FAN_MARK_IGNORE);
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
@@ -1557,7 +1597,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
__kernel_fsid_t __fsid, *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
- bool ignored = flags & FAN_MARK_IGNORED_MASK;
+ unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
+ unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
u32 umask = 0;
int ret;
@@ -1586,7 +1627,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
}
- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+ switch (mark_cmd) {
case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
if (!mask)
@@ -1606,9 +1647,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ~valid_mask)
return -EINVAL;
- /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
- if (ignored)
+
+ /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */
+ if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK))
+ return -EINVAL;
+
+ /*
+ * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
+ * FAN_MARK_IGNORED_MASK.
+ */
+ if (ignore == FAN_MARK_IGNORED_MASK) {
mask &= ~FANOTIFY_EVENT_FLAGS;
+ umask = FANOTIFY_EVENT_FLAGS;
+ }
f = fdget(fanotify_fd);
if (unlikely(!f.file))
@@ -1672,7 +1723,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
goto fput_and_out;
- if (flags & FAN_MARK_FLUSH) {
+ if (mark_cmd == FAN_MARK_FLUSH) {
ret = 0;
if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
@@ -1688,7 +1739,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
- if (flags & FAN_MARK_ADD) {
+ if (mark_cmd == FAN_MARK_ADD) {
ret = fanotify_events_supported(group, &path, mask, flags);
if (ret)
goto path_put_and_out;
@@ -1712,6 +1763,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
+ ret = mnt ? -EINVAL : -EISDIR;
+ /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
+ if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
+ (mnt || S_ISDIR(inode->i_mode)) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY))
+ goto path_put_and_out;
+
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
@@ -1721,12 +1779,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* events with parent/name info for non-directory.
*/
if ((fid_mode & FAN_REPORT_DIR_FID) &&
- (flags & FAN_MARK_ADD) && !ignored)
+ (flags & FAN_MARK_ADD) && !ignore)
mask |= FAN_EVENT_ON_CHILD;
}
/* create/update an inode mark */
- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+ switch (mark_cmd) {
case FAN_MARK_ADD:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask,
@@ -1804,7 +1862,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 59fb40abe33d..55081ae3a6ec 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -113,7 +113,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
return;
seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
inode->i_ino, inode->i_sb->s_dev,
- mflags, mark->mask, mark->ignored_mask);
+ mflags, mark->mask, mark->ignore_mask);
show_mark_fhandle(m, inode);
seq_putc(m, '\n');
iput(inode);
@@ -121,12 +121,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
struct mount *mnt = fsnotify_conn_mount(mark->connector);
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
- mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+ mnt->mnt_id, mflags, mark->mask, mark->ignore_mask);
} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
struct super_block *sb = fsnotify_conn_sb(mark->connector);
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
- sb->s_dev, mflags, mark->mask, mark->ignored_mask);
+ sb->s_dev, mflags, mark->mask, mark->ignore_mask);
}
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0b3e74935cb4..7974e91ffe13 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -100,7 +100,7 @@ void fsnotify_sb_delete(struct super_block *sb)
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
- * parent cares. Thus when an event happens on a child it can quickly tell if
+ * parent cares. Thus when an event happens on a child it can quickly tell
* if there is a need to find a parent and send the event to the parent.
*/
void __fsnotify_update_child_dentry_flags(struct inode *inode)
@@ -324,7 +324,8 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
struct fsnotify_group *group = NULL;
__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
__u32 marks_mask = 0;
- __u32 marks_ignored_mask = 0;
+ __u32 marks_ignore_mask = 0;
+ bool is_dir = mask & FS_ISDIR;
struct fsnotify_mark *mark;
int type;
@@ -336,7 +337,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
if (!(mark->flags &
FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
- mark->ignored_mask = 0;
+ mark->ignore_mask = 0;
}
}
@@ -344,14 +345,15 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
group = mark->group;
marks_mask |= mark->mask;
- marks_ignored_mask |= mark->ignored_mask;
+ marks_ignore_mask |=
+ fsnotify_effective_ignore_mask(mark, is_dir, type);
}
- pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
- __func__, group, mask, marks_mask, marks_ignored_mask,
+ pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+ __func__, group, mask, marks_mask, marks_ignore_mask,
data, data_type, dir, cookie);
- if (!(test_mask & marks_mask & ~marks_ignored_mask))
+ if (!(test_mask & marks_mask & ~marks_ignore_mask))
return 0;
if (group->ops->handle_event) {
@@ -423,7 +425,8 @@ static bool fsnotify_iter_select_report_types(
* But is *this mark* watching children?
*/
if (type == FSNOTIFY_ITER_TYPE_PARENT &&
- !(mark->mask & FS_EVENT_ON_CHILD))
+ !(mark->mask & FS_EVENT_ON_CHILD) &&
+ !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
continue;
fsnotify_iter_set_report_type(iter_info, type);
@@ -532,8 +535,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
/*
- * If this is a modify event we may need to clear some ignored masks.
- * In that case, the object with ignored masks will have the FS_MODIFY
+ * If this is a modify event we may need to clear some ignore masks.
+ * In that case, the object with ignore masks will have the FS_MODIFY
* event in its mask.
* Otherwise, return if none of the marks care about this type of event.
*/
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ed42a189faa2..1c4bfdab008d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -136,7 +136,7 @@ static inline u32 inotify_mask_to_arg(__u32 mask)
IN_Q_OVERFLOW);
}
-/* intofiy userspace file descriptor functions */
+/* inotify userspace file descriptor functions */
static __poll_t inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 4de597a83b88..52615e6090e1 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
a = (ATTR_RECORD*)((u8*)ctx->attr +
le32_to_cpu(ctx->attr->length));
for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated))
+ u8 *mrec_end = (u8 *)ctx->mrec +
+ le32_to_cpu(ctx->mrec->bytes_allocated);
+ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
+ a->name_length * sizeof(ntfschar);
+ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
+ name_end > mrec_end)
break;
ctx->attr = a;
if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7497cd592258..9c67edd215d5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1146,7 +1146,7 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (status)
return status;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
status = dquot_initialize(inode);
if (status)
return status;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 337527571461..740b64238312 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -277,7 +277,6 @@ enum ocfs2_mount_options
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
{
- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
+ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
}
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 0b0ae3ebb0cf..da7718cef735 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid ||
- !si->si_slots[preferred].sl_node_num) {
+ if (!si->si_slots[preferred].sl_valid) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid ||
- !si->si_slots[i].sl_node_num) {
+ if (!si->si_slots[i].sl_valid) {
ret = i;
break;
}
@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
- if (ocfs2_mount_local(osb))
- /* use slot 0 directly in local mode */
- slot = 0;
- else {
- /* search for ourselves first and take the slot if it already
- * exists. Perhaps we need to mark this in a variable for our
- * own journal recovery? Possibly not, though we certainly
- * need to warn to the user */
- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ /* search for ourselves first and take the slot if it already
+ * exists. Perhaps we need to mark this in a variable for our
+ * own journal recovery? Possibly not, though we certainly
+ * need to warn to the user */
+ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ if (slot < 0) {
+ /* if no slot yet, then just take 1st available
+ * one. */
+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
if (slot < 0) {
- /* if no slot yet, then just take 1st available
- * one. */
- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
- if (slot < 0) {
- spin_unlock(&osb->osb_lock);
- mlog(ML_ERROR, "no free slots available!\n");
- status = -EINVAL;
- goto bail;
- }
- } else
- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
- "already allocated to this node!\n",
- slot, osb->dev_str);
- }
+ spin_unlock(&osb->osb_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+ status = -EINVAL;
+ goto bail;
+ }
+ } else
+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
+ "allocated to this node!\n", slot, osb->dev_str);
ocfs2_set_slot(si, slot, osb->node_num);
osb->slot_num = slot;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f7298816d8d9..438be028935d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -172,7 +172,6 @@ enum {
Opt_dir_resv_level,
Opt_journal_async_commit,
Opt_err_cont,
- Opt_nocluster,
Opt_err,
};
@@ -206,7 +205,6 @@ static const match_table_t tokens = {
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_err_cont, "errors=continue"},
- {Opt_nocluster, "nocluster"},
{Opt_err, NULL}
};
@@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
- tmp = OCFS2_MOUNT_NOCLUSTER;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
- ret = -EINVAL;
- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
- goto out;
- }
-
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
}
if (ocfs2_userspace_stack(osb) &&
- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
OCFS2_STACK_LABEL_LEN)) {
mlog(ML_ERROR,
@@ -1137,11 +1127,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
"ordered");
- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
- "without cluster aware mode.\n", osb->dev_str);
-
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
@@ -1452,9 +1437,6 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_journal_async_commit:
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
break;
- case Opt_nocluster:
- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
- break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1566,9 +1548,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
seq_printf(s, ",journal_async_commit");
- if (opts & OCFS2_MOUNT_NOCLUSTER)
- seq_printf(s, ",nocluster");
-
return 0;
}
diff --git a/fs/open.c b/fs/open.c
index 1d57fbde2feb..2790aac66e58 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -663,6 +663,42 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode);
}
+/**
+ * setattr_vfsuid - check and set ia_fsuid attribute
+ * @kuid: new inode owner
+ *
+ * Check whether @kuid is valid and if so generate and set vfsuid_t in
+ * ia_vfsuid.
+ *
+ * Return: true if @kuid is valid, false if not.
+ */
+static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
+{
+ if (!uid_valid(kuid))
+ return false;
+ attr->ia_valid |= ATTR_UID;
+ attr->ia_vfsuid = VFSUIDT_INIT(kuid);
+ return true;
+}
+
+/**
+ * setattr_vfsgid - check and set ia_fsgid attribute
+ * @kgid: new inode owner
+ *
+ * Check whether @kgid is valid and if so generate and set vfsgid_t in
+ * ia_vfsgid.
+ *
+ * Return: true if @kgid is valid, false if not.
+ */
+static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
+{
+ if (!gid_valid(kgid))
+ return false;
+ attr->ia_valid |= ATTR_GID;
+ attr->ia_vfsgid = VFSGIDT_INIT(kgid);
+ return true;
+}
+
int chown_common(const struct path *path, uid_t user, gid_t group)
{
struct user_namespace *mnt_userns, *fs_userns;
@@ -678,28 +714,22 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
mnt_userns = mnt_user_ns(path->mnt);
fs_userns = i_user_ns(inode);
- uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
- gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
- if (user != (uid_t) -1) {
- if (!uid_valid(uid))
- return -EINVAL;
- newattrs.ia_valid |= ATTR_UID;
- newattrs.ia_uid = uid;
- }
- if (group != (gid_t) -1) {
- if (!gid_valid(gid))
- return -EINVAL;
- newattrs.ia_valid |= ATTR_GID;
- newattrs.ia_gid = gid;
- }
+ if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
+ return -EINVAL;
+ if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
+ return -EINVAL;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
inode_lock(inode);
- error = security_path_chown(path, uid, gid);
+ /* Continue to send actual fs values, not the mount values. */
+ error = security_path_chown(
+ path,
+ from_vfsuid(mnt_userns, fs_userns, newattrs.ia_vfsuid),
+ from_vfsgid(mnt_userns, fs_userns, newattrs.ia_vfsgid));
if (!error)
error = notify_change(mnt_userns, path->dentry, &newattrs,
&delegated_inode);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 714ec569d25b..245e2cb62708 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -331,8 +331,8 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
- .ia_uid = stat->uid,
- .ia_gid = stat->gid,
+ .ia_vfsuid = VFSUIDT_INIT(stat->uid),
+ .ia_vfsgid = VFSGIDT_INIT(stat->gid),
};
err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 492eddeb481f..7922b619f6c8 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -454,23 +454,94 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return res;
}
+/*
+ * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
+ * of the POSIX ACLs retrieved from the lower layer to this function to not
+ * alter the POSIX ACLs for the underlying filesystem.
+ */
+static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
+ struct posix_acl *acl)
+{
+ for (unsigned int i = 0; i < acl->a_count; i++) {
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ struct posix_acl_entry *e = &acl->a_entries[i];
+ switch (e->e_tag) {
+ case ACL_USER:
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid);
+ e->e_uid = vfsuid_into_kuid(vfsuid);
+ break;
+ case ACL_GROUP:
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid);
+ e->e_gid = vfsgid_into_kgid(vfsgid);
+ break;
+ }
+ }
+}
+
+/*
+ * When the relevant layer is an idmapped mount we need to take the idmapping
+ * of the layer into account and translate any ACL_{GROUP,USER} values
+ * according to the idmapped mount.
+ *
+ * We cannot alter the ACLs returned from the relevant layer as that would
+ * alter the cached values filesystem wide for the lower filesystem. Instead we
+ * can clone the ACLs and then apply the relevant idmapping of the layer.
+ *
+ * This is obviously only relevant when idmapped layers are used.
+ */
struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
{
struct inode *realinode = ovl_inode_real(inode);
- const struct cred *old_cred;
- struct posix_acl *acl;
+ struct posix_acl *acl, *clone;
+ struct path realpath;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
return NULL;
- if (rcu)
- return get_cached_acl_rcu(realinode, type);
+ /* Careful in RCU walk mode */
+ ovl_i_path_real(inode, &realpath);
+ if (!realpath.dentry) {
+ WARN_ON(!rcu);
+ return ERR_PTR(-ECHILD);
+ }
- old_cred = ovl_override_creds(inode->i_sb);
- acl = get_acl(realinode, type);
- revert_creds(old_cred);
+ if (rcu) {
+ acl = get_cached_acl_rcu(realinode, type);
+ } else {
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ acl = get_acl(realinode, type);
+ revert_creds(old_cred);
+ }
+ /*
+ * If there are no POSIX ACLs, or we encountered an error,
+ * or the layer isn't idmapped we don't need to do anything.
+ */
+ if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl))
+ return acl;
- return acl;
+ /*
+ * We only get here if the layer is idmapped. So drop out of RCU path
+ * walk so we can clone the ACLs. There's no need to release the ACLs
+ * since get_cached_acl_rcu() doesn't take a reference on the ACLs.
+ */
+ if (rcu)
+ return ERR_PTR(-ECHILD);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ if (!clone)
+ clone = ERR_PTR(-ENOMEM);
+ else
+ ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone);
+ /*
+ * Since we're not in RCU path walk we always need to release the
+ * original ACLs.
+ */
+ posix_acl_release(acl);
+ return clone;
}
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4f34b7e02eee..6ec815b84d48 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -139,17 +139,7 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs,
struct dentry *upperdentry,
struct iattr *attr)
{
- struct user_namespace *upper_mnt_userns = ovl_upper_mnt_userns(ofs);
- struct user_namespace *fs_userns = i_user_ns(d_inode(upperdentry));
-
- if (attr->ia_valid & ATTR_UID)
- attr->ia_uid = mapped_kuid_user(upper_mnt_userns,
- fs_userns, attr->ia_uid);
- if (attr->ia_valid & ATTR_GID)
- attr->ia_gid = mapped_kgid_user(upper_mnt_userns,
- fs_userns, attr->ia_gid);
-
- return notify_change(upper_mnt_userns, upperdentry, attr, NULL);
+ return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL);
}
static inline int ovl_do_rmdir(struct ovl_fs *ofs,
@@ -259,7 +249,8 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
const char *name, const void *value,
size_t size, int flags)
{
- int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name, value, size, flags);
+ int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
+ (void *)value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1ce5c9698393..e0a2e0468ee7 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1003,9 +1003,6 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
- if (!IS_POSIXACL(inode))
- return -EOPNOTSUPP;
-
return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
}
@@ -1021,9 +1018,6 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
struct posix_acl *acl = NULL;
int err;
- if (!IS_POSIXACL(inode))
- return -EOPNOTSUPP;
-
/* Check that everything is OK before copy-up */
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
@@ -1966,20 +1960,6 @@ static struct dentry *ovl_get_root(struct super_block *sb,
return root;
}
-static bool ovl_has_idmapped_layers(struct ovl_fs *ofs)
-{
-
- unsigned int i;
- const struct vfsmount *mnt;
-
- for (i = 0; i < ofs->numlayer; i++) {
- mnt = ofs->layers[i].mnt;
- if (mnt && is_idmapped_mnt(mnt))
- return true;
- }
- return false;
-}
-
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { };
@@ -2149,10 +2129,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
- if (ovl_has_idmapped_layers(ofs))
- pr_warn("POSIX ACLs are not yet supported with idmapped layers, mounting without ACL support.\n");
- else
- sb->s_flags |= SB_POSIXACL;
+ sb->s_flags |= SB_POSIXACL;
sb->s_iflags |= SB_I_SKIP_SYNC;
err = -ENOMEM;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 962d32468eb4..1d17d7b13dcd 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(posix_acl_alloc);
/*
* Clone an ACL.
*/
-static struct posix_acl *
+struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
struct posix_acl *clone = NULL;
@@ -213,6 +213,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
}
return clone;
}
+EXPORT_SYMBOL_GPL(posix_acl_clone);
/*
* Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
@@ -361,8 +362,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
int found = 0;
- kuid_t uid;
- kgid_t gid;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
want &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -370,30 +371,28 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
- uid = i_uid_into_mnt(mnt_userns, inode);
- if (uid_eq(uid, current_fsuid()))
+ vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto check_perm;
break;
case ACL_USER:
- uid = mapped_kuid_fs(mnt_userns,
- i_user_ns(inode),
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns,
pa->e_uid);
- if (uid_eq(uid, current_fsuid()))
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
break;
case ACL_GROUP_OBJ:
- gid = i_gid_into_mnt(mnt_userns, inode);
- if (in_group_p(gid)) {
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ if (vfsgid_in_group_p(vfsgid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
}
break;
case ACL_GROUP:
- gid = mapped_kgid_fs(mnt_userns,
- i_user_ns(inode),
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns,
pa->e_gid);
- if (in_group_p(gid)) {
+ if (vfsgid_in_group_p(vfsgid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
@@ -699,7 +698,7 @@ int posix_acl_update_mode(struct user_namespace *mnt_userns,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+ if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
@@ -710,46 +709,127 @@ EXPORT_SYMBOL(posix_acl_update_mode);
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
-static void posix_acl_fix_xattr_userns(
- struct user_namespace *to, struct user_namespace *from,
- struct user_namespace *mnt_userns,
- void *value, size_t size, bool from_user)
+static int posix_acl_fix_xattr_common(void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ int count;
+
+ if (!header)
+ return -EINVAL;
+ if (size < sizeof(struct posix_acl_xattr_header))
+ return -EINVAL;
+ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+ return -EINVAL;
+
+ count = posix_acl_xattr_count(size);
+ if (count < 0)
+ return -EINVAL;
+ if (count == 0)
+ return -EINVAL;
+
+ return count;
+}
+
+void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size)
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
int count;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
kuid_t uid;
kgid_t gid;
- if (!value)
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
return;
- if (size < sizeof(struct posix_acl_xattr_header))
+
+ count = posix_acl_fix_xattr_common(value, size);
+ if (count < 0)
return;
- if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+
+ for (end = entry + count; entry != end; entry++) {
+ switch (le16_to_cpu(entry->e_tag)) {
+ case ACL_USER:
+ uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid);
+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns,
+ vfsuid_into_kuid(vfsuid)));
+ break;
+ case ACL_GROUP:
+ gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid);
+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns,
+ vfsgid_into_kgid(vfsgid)));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ int count;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+ kuid_t uid;
+ kgid_t gid;
+
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
return;
- count = posix_acl_xattr_count(size);
+ count = posix_acl_fix_xattr_common(value, size);
if (count < 0)
return;
- if (count == 0)
+
+ for (end = entry + count; entry != end; entry++) {
+ switch (le16_to_cpu(entry->e_tag)) {
+ case ACL_USER:
+ uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsuid = VFSUIDT_INIT(uid);
+ uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid);
+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
+ break;
+ case ACL_GROUP:
+ gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsgid = VFSGIDT_INIT(gid);
+ gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid);
+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void posix_acl_fix_xattr_userns(
+ struct user_namespace *to, struct user_namespace *from,
+ void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ int count;
+ kuid_t uid;
+ kgid_t gid;
+
+ count = posix_acl_fix_xattr_common(value, size);
+ if (count < 0)
return;
for (end = entry + count; entry != end; entry++) {
switch(le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(from, le32_to_cpu(entry->e_id));
- if (from_user)
- uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
- else
- uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid);
entry->e_id = cpu_to_le32(from_kuid(to, uid));
break;
case ACL_GROUP:
gid = make_kgid(from, le32_to_cpu(entry->e_id));
- if (from_user)
- gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
- else
- gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid);
entry->e_id = cpu_to_le32(from_kgid(to, gid));
break;
default:
@@ -758,34 +838,20 @@ static void posix_acl_fix_xattr_userns(
}
}
-void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+void posix_acl_fix_xattr_from_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
-
- /* Leave ids untouched on non-idmapped mounts. */
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- mnt_userns = &init_user_ns;
- if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ if (user_ns == &init_user_ns)
return;
- posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
- size, true);
+ posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
}
-void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+void posix_acl_fix_xattr_to_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
-
- /* Leave ids untouched on non-idmapped mounts. */
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- mnt_userns = &init_user_ns;
- if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ if (user_ns == &init_user_ns)
return;
- posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
- size, false);
+ posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
}
/*
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 09d1307959d0..28966da7834e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2085,7 +2085,8 @@ EXPORT_SYMBOL(__dquot_transfer);
/* Wrapper for transferring ownership of an inode for uid/gid only
* Called from FSXXX_setattr()
*/
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+ struct iattr *iattr)
{
struct dquot *transfer_to[MAXQUOTAS] = {};
struct dquot *dquot;
@@ -2095,8 +2096,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (!dquot_active(inode))
return 0;
- if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
- dquot = dqget(sb, make_kqid_uid(iattr->ia_uid));
+ if (i_uid_needs_update(mnt_userns, iattr, inode)) {
+ kuid_t kuid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsuid);
+
+ dquot = dqget(sb, make_kqid_uid(kuid));
if (IS_ERR(dquot)) {
if (PTR_ERR(dquot) != -ESRCH) {
ret = PTR_ERR(dquot);
@@ -2106,8 +2110,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
}
transfer_to[USRQUOTA] = dquot;
}
- if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)){
- dquot = dqget(sb, make_kqid_gid(iattr->ia_gid));
+ if (i_gid_needs_update(mnt_userns, iattr, inode)) {
+ kgid_t kgid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsgid);
+
+ dquot = dqget(sb, make_kqid_gid(kgid));
if (IS_ERR(dquot)) {
if (PTR_ERR(dquot) != -ESRCH) {
ret = PTR_ERR(dquot);
diff --git a/fs/read_write.c b/fs/read_write.c
index e0777eefd846..397da0236607 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1263,6 +1263,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
count, fl);
file_end_write(out.file);
} else {
+ if (out.file->f_flags & O_NONBLOCK)
+ fl |= SPLICE_F_NONBLOCK;
+
retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0cffe054b78e..0df48d176732 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -290,7 +290,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
struct buffer_head *bh;
struct item_head *ih, tmp_ih;
b_blocknr_t blocknr;
- char *p = NULL;
+ char *p;
int chars;
int ret;
int result;
@@ -305,8 +305,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
result = search_for_position_by_key(inode->i_sb, &key, &path);
if (result != POSITION_FOUND) {
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
if (result == IO_ERROR)
return -EIO;
/*
@@ -352,8 +350,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
}
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
return ret;
}
/* requested data are in direct item(s) */
@@ -363,8 +359,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* when it is stored in direct item(s)
*/
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
return -ENOENT;
}
@@ -396,9 +390,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* sure we need to. But, this means the item might move if
* kmap schedules
*/
- if (!p)
- p = (char *)kmap(bh_result->b_page);
-
+ p = (char *)kmap(bh_result->b_page);
p += offset;
memset(p, 0, inode->i_sb->s_blocksize);
do {
@@ -3284,7 +3276,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
@@ -3367,7 +3359,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
reiserfs_write_unlock(inode->i_sb);
if (error)
goto out;
- error = dquot_transfer(inode, attr);
+ error = dquot_transfer(mnt_userns, inode, attr);
reiserfs_write_lock(inode->i_sb);
if (error) {
journal_end(&th);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e943370107d0..de86f5b2859f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -192,17 +192,19 @@ static inline void msg_init(struct uffd_msg *msg)
}
static inline struct uffd_msg userfault_msg(unsigned long address,
+ unsigned long real_address,
unsigned int flags,
unsigned long reason,
unsigned int features)
{
struct uffd_msg msg;
+
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
- if (!(features & UFFD_FEATURE_EXACT_ADDRESS))
- address &= PAGE_MASK;
- msg.arg.pagefault.address = address;
+ msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ?
+ real_address : address;
+
/*
* These flags indicate why the userfault occurred:
* - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault.
@@ -488,8 +490,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason,
- ctx->features);
+ uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
+ reason, ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
diff --git a/fs/xattr.c b/fs/xattr.c
index e8dd03e4561e..a1f4998bc6be 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -282,9 +282,15 @@ out:
}
EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
+static inline bool is_posix_acl_xattr(const char *name)
+{
+ return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0);
+}
+
int
vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
- const char *name, const void *value, size_t size, int flags)
+ const char *name, void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -292,12 +298,16 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(mnt_userns, dentry, &value, size);
+ error = cap_convert_nscap(mnt_userns, dentry,
+ (const void **)&value, size);
if (error < 0)
return error;
size = error;
}
+ if (size && is_posix_acl_xattr(name))
+ posix_acl_setxattr_idmapped_mnt(mnt_userns, inode, value, size);
+
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
@@ -431,7 +441,10 @@ vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return ret;
}
nolsm:
- return __vfs_getxattr(dentry, inode, name, value, size);
+ error = __vfs_getxattr(dentry, inode, name, value, size);
+ if (error > 0 && is_posix_acl_xattr(name))
+ posix_acl_getxattr_idmapped_mnt(mnt_userns, inode, value, size);
+ return error;
}
EXPORT_SYMBOL_GPL(vfs_getxattr);
@@ -577,8 +590,7 @@ static void setxattr_convert(struct user_namespace *mnt_userns,
if (ctx->size &&
((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
- posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
- ctx->kvalue, ctx->size);
+ posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size);
}
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
@@ -695,8 +707,7 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
- ctx->kvalue, error);
+ posix_acl_fix_xattr_to_user(ctx->kvalue, error);
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 29f5b8b8aca6..a7402f6ea510 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -667,13 +667,15 @@ xfs_setattr_nonsize(
uint qflags = 0;
if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = iattr->ia_uid;
+ uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsuid);
qflags |= XFS_QMOPT_UQUOTA;
} else {
uid = inode->i_uid;
}
if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = iattr->ia_gid;
+ gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsgid);
qflags |= XFS_QMOPT_GQUOTA;
} else {
gid = inode->i_gid;
@@ -704,13 +706,13 @@ xfs_setattr_nonsize(
* didn't have the inode locked, inode's dquot(s) would have changed
* also.
*/
- if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp) &&
- !uid_eq(inode->i_uid, iattr->ia_uid)) {
+ if (XFS_IS_UQUOTA_ON(mp) &&
+ i_uid_needs_update(mnt_userns, iattr, inode)) {
ASSERT(udqp);
old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
}
- if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp) &&
- !gid_eq(inode->i_gid, iattr->ia_gid)) {
+ if (XFS_IS_GQUOTA_ON(mp) &&
+ i_gid_needs_update(mnt_userns, iattr, inode)) {
ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
ASSERT(gdqp);
old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 053299758deb..f5d8338967cb 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -616,7 +616,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
!uid_eq(iattr->ia_uid, inode->i_uid)) ||
((iattr->ia_valid & ATTR_GID) &&
!gid_eq(iattr->ia_gid, inode->i_gid))) {
- ret = dquot_transfer(inode, iattr);
+ ret = dquot_transfer(mnt_userns, inode, iattr);
if (ret)
return ret;
}
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 7ce93aaf69f8..98954dda5734 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
}
#endif
-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
extern int devmem_is_allowed(unsigned long pfn);
-#endif
#endif /* __KERNEL__ */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index ff3e82553a76..492dce43236e 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -158,9 +158,24 @@
* Useful if your architecture doesn't use IPIs for remote TLB invalidates
* and therefore doesn't naturally serialize with software page-table walkers.
*
+ * MMU_GATHER_NO_FLUSH_CACHE
+ *
+ * Indicates the architecture has flush_cache_range() but it needs *NOT* be called
+ * before unmapping a VMA.
+ *
+ * NOTE: strictly speaking we shouldn't have this knob and instead rely on
+ * flush_cache_range() being a NOP, except Sparc64 seems to be
+ * different here.
+ *
+ * MMU_GATHER_MERGE_VMAS
+ *
+ * Indicates the architecture wants to merge ranges over VMAs; typical when
+ * multiple range invalidates are more expensive than a full invalidate.
+ *
* MMU_GATHER_NO_RANGE
*
- * Use this if your architecture lacks an efficient flush_tlb_range().
+ * Use this if your architecture lacks an efficient flush_tlb_range(). This
+ * option implies MMU_GATHER_MERGE_VMAS above.
*
* MMU_GATHER_NO_GATHER
*
@@ -288,6 +303,7 @@ struct mmu_gather {
*/
unsigned int vma_exec : 1;
unsigned int vma_huge : 1;
+ unsigned int vma_pfn : 1;
unsigned int batch_count;
@@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
#ifdef CONFIG_MMU_GATHER_NO_RANGE
-#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
-#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#if defined(tlb_flush)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush()
#endif
/*
@@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm(tlb->mm);
}
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#define tlb_end_vma tlb_end_vma
-static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
#else /* CONFIG_MMU_GATHER_NO_RANGE */
#ifndef tlb_flush
-
-#if defined(tlb_start_vma) || defined(tlb_end_vma)
-#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
-#endif
-
/*
* When an architecture does not provide its own tlb_flush() implementation
* but does have a reasonably efficient flush_vma_range() implementation
@@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_range(&vma, tlb->start, tlb->end);
}
}
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
static inline void
tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
@@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
*/
tlb->vma_huge = is_vm_hugetlb_page(vma);
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+ tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
}
-#else
-
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#endif
-
-#endif /* CONFIG_MMU_GATHER_NO_RANGE */
-
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
/*
@@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
* case where we're doing a full MM flush. When we're doing a munmap,
* the vmas are adjusted to only cover the region to be torn down.
*/
-#ifndef tlb_start_vma
static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
tlb_update_vma_flags(tlb, vma);
+#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
#endif
+}
-#ifndef tlb_end_vma
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
/*
- * Do a TLB flush and reset the range at VMA boundaries; this avoids
- * the ranges growing with the unused space between consecutive VMAs,
- * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
- * this.
+ * VM_PFNMAP is more fragile because the core mm will not track the
+ * page mapcount -- there might not be page-frames for these PFNs after
+ * all. Force flush TLBs for such ranges to avoid munmap() vs
+ * unmap_mapping_range() races.
*/
- tlb_flush_mmu_tlbonly(tlb);
+ if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
-#endif
/*
* tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 0fca8f38bee4..addb135eeea6 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -28,7 +28,7 @@
#include <linux/dma-fence.h>
#include <linux/completion.h>
#include <linux/xarray.h>
-#include <linux/irq_work.h>
+#include <linux/workqueue.h>
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
@@ -295,7 +295,7 @@ struct drm_sched_job {
*/
union {
struct dma_fence_cb finish_cb;
- struct irq_work work;
+ struct work_struct work;
};
uint64_t id;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 19f0dbfdd7fe..b66c5f389159 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -130,7 +130,6 @@ enum cpuhp_state {
CPUHP_ZCOMP_PREPARE,
CPUHP_TIMERS_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
- CPUHP_LOONGARCH_SOC_PREPARE,
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 4c374be70247..aa63e0b3c0a2 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -21,7 +21,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
void *xattr_value,
size_t xattr_value_len,
struct integrity_iint_cache *iint);
-extern int evm_inode_setattr(struct dentry *dentry, struct iattr *attr);
+extern int evm_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
extern int evm_inode_setxattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
@@ -68,7 +69,8 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
}
#endif
-static inline int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
+static inline int evm_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
return 0;
}
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e517dbcf74ed..8ad743def6f3 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -59,15 +59,19 @@
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
+#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
+ FAN_MARK_FLUSH)
+
+#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \
+ FAN_MARK_IGNORE)
+
#define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \
- FAN_MARK_ADD | \
- FAN_MARK_REMOVE | \
+ FANOTIFY_MARK_CMD_BITS | \
+ FANOTIFY_MARK_IGNORE_BITS | \
FAN_MARK_DONT_FOLLOW | \
FAN_MARK_ONLYDIR | \
- FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORED_SURV_MODIFY | \
- FAN_MARK_EVICTABLE | \
- FAN_MARK_FLUSH)
+ FAN_MARK_EVICTABLE)
/*
* Events that can be reported with data type FSNOTIFY_EVENT_PATH.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ad5e3520fae..ec2e35886779 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -221,8 +221,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
struct iattr {
unsigned int ia_valid;
umode_t ia_mode;
- kuid_t ia_uid;
- kgid_t ia_gid;
+ /*
+ * The two anonymous unions wrap structures with the same member.
+ *
+ * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
+ * are a dedicated type requiring the filesystem to use the dedicated
+ * helpers. Other filesystem can continue to use ia_{g,u}id until they
+ * have been ported.
+ *
+ * They always contain the same value. In other words FS_ALLOW_IDMAP
+ * pass down the same value on idmapped mounts as they would on regular
+ * mounts.
+ */
+ union {
+ kuid_t ia_uid;
+ vfsuid_t ia_vfsuid;
+ };
+ union {
+ kgid_t ia_gid;
+ vfsgid_t ia_vfsgid;
+ };
loff_t ia_size;
struct timespec64 ia_atime;
struct timespec64 ia_mtime;
@@ -1600,13 +1618,68 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
*
+ * Note, this will eventually be removed completely in favor of the type-safe
+ * i_uid_into_vfsuid().
+ *
* Return: the inode's i_uid mapped down according to @mnt_userns.
* If the inode's i_uid has no mapping INVALID_UID is returned.
*/
static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
+ return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid));
+}
+
+/**
+ * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to map
+ *
+ * Return: whe inode's i_uid mapped down according to @mnt_userns.
+ * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
+ */
+static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+{
+ return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid);
+}
+
+/**
+ * i_uid_needs_update - check whether inode's i_uid needs to be updated
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Check whether the $inode's i_uid field needs to be updated taking idmapped
+ * mounts into account if the filesystem supports it.
+ *
+ * Return: true if @inode's i_uid field needs to be updated, false if not.
+ */
+static inline bool i_uid_needs_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ const struct inode *inode)
+{
+ return ((attr->ia_valid & ATTR_UID) &&
+ !vfsuid_eq(attr->ia_vfsuid,
+ i_uid_into_vfsuid(mnt_userns, inode)));
+}
+
+/**
+ * i_uid_update - update @inode's i_uid field
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Safely update @inode's i_uid field translating the vfsuid of any idmapped
+ * mount into the filesystem kuid.
+ */
+static inline void i_uid_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ struct inode *inode)
+{
+ if (attr->ia_valid & ATTR_UID)
+ inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ attr->ia_vfsuid);
}
/**
@@ -1614,13 +1687,68 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
*
+ * Note, this will eventually be removed completely in favor of the type-safe
+ * i_gid_into_vfsgid().
+ *
* Return: the inode's i_gid mapped down according to @mnt_userns.
* If the inode's i_gid has no mapping INVALID_GID is returned.
*/
static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
+ return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid));
+}
+
+/**
+ * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to map
+ *
+ * Return: the inode's i_gid mapped down according to @mnt_userns.
+ * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
+ */
+static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+{
+ return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid);
+}
+
+/**
+ * i_gid_needs_update - check whether inode's i_gid needs to be updated
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Check whether the $inode's i_gid field needs to be updated taking idmapped
+ * mounts into account if the filesystem supports it.
+ *
+ * Return: true if @inode's i_gid field needs to be updated, false if not.
+ */
+static inline bool i_gid_needs_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ const struct inode *inode)
+{
+ return ((attr->ia_valid & ATTR_GID) &&
+ !vfsgid_eq(attr->ia_vfsgid,
+ i_gid_into_vfsgid(mnt_userns, inode)));
+}
+
+/**
+ * i_gid_update - update @inode's i_gid field
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Safely update @inode's i_gid field translating the vfsgid of any idmapped
+ * mount into the filesystem kgid.
+ */
+static inline void i_gid_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ struct inode *inode)
+{
+ if (attr->ia_valid & ATTR_GID)
+ inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ attr->ia_vfsgid);
}
/**
@@ -2195,8 +2323,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
struct inode *inode)
{
- return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
- !gid_valid(i_gid_into_mnt(mnt_userns, inode));
+ return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
+ !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode));
}
static inline int iocb_flags(struct file *file);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9560734759fa..d7d96c806bff 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -518,8 +518,8 @@ struct fsnotify_mark {
struct hlist_node obj_list;
/* Head of list of marks for an object [mark ref] */
struct fsnotify_mark_connector *connector;
- /* Events types to ignore [mark->lock, group->mark_mutex] */
- __u32 ignored_mask;
+ /* Events types and flags to ignore [mark->lock, group->mark_mutex] */
+ __u32 ignore_mask;
/* General fsnotify mark flags */
#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
@@ -529,6 +529,7 @@ struct fsnotify_mark {
/* fanotify mark flags */
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
+#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400
unsigned int flags; /* flags [mark->lock] */
};
@@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
/* functions used to manipulate the marks attached to inodes */
-/* Get mask for calculating object interest taking ignored mask into account */
+/*
+ * Canonical "ignore mask" including event flags.
+ *
+ * Note the subtle semantic difference from the legacy ->ignored_mask.
+ * ->ignored_mask traditionally only meant which events should be ignored,
+ * while ->ignore_mask also includes flags regarding the type of objects on
+ * which events should be ignored.
+ */
+static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
+{
+ __u32 ignore_mask = mark->ignore_mask;
+
+ /* The event flags in ignore mask take effect */
+ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ return ignore_mask;
+
+ /*
+ * Legacy behavior:
+ * - Always ignore events on dir
+ * - Ignore events on child if parent is watching children
+ */
+ ignore_mask |= FS_ISDIR;
+ ignore_mask &= ~FS_EVENT_ON_CHILD;
+ ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;
+
+ return ignore_mask;
+}
+
+/* Legacy ignored_mask - only event types to ignore */
+static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
+{
+ return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
+}
+
+/*
+ * Check if mask (or ignore mask) should be applied depending if victim is a
+ * directory and whether it is reported to a watching parent.
+ */
+static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
+ int iter_type)
+{
+ /* Should mask be applied to a directory? */
+ if (is_dir && !(mask & FS_ISDIR))
+ return false;
+
+ /* Should mask be applied to a child? */
+ if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
+ !(mask & FS_EVENT_ON_CHILD))
+ return false;
+
+ return true;
+}
+
+/*
+ * Effective ignore mask taking into account if event victim is a
+ * directory and whether it is reported to a watching parent.
+ */
+static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
+ bool is_dir, int iter_type)
+{
+ __u32 ignore_mask = fsnotify_ignored_events(mark);
+
+ if (!ignore_mask)
+ return 0;
+
+ /* For non-dir and non-child, no need to consult the event flags */
+ if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
+ return ignore_mask;
+
+ ignore_mask = fsnotify_ignore_mask(mark);
+ if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
+ return 0;
+
+ return ignore_mask & ALL_FSNOTIFY_EVENTS;
+}
+
+/* Get mask for calculating object interest taking ignore mask into account */
static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
{
__u32 mask = mark->mask;
- if (!mark->ignored_mask)
+ if (!fsnotify_ignored_events(mark))
return mask;
- /* Interest in FS_MODIFY may be needed for clearing ignored mask */
+ /* Interest in FS_MODIFY may be needed for clearing ignore mask */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mask |= FS_MODIFY;
@@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
* If mark is interested in ignoring events on children, the object must
* show interest in those events for fsnotify_parent() to notice it.
*/
- return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS);
+ return mask | mark->ignore_mask;
}
/* Get mask of events for a list of marks */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf3d0d673f6b..7898e29bcfb5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page)
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-bool __put_devmap_managed_page(struct page *page);
-static inline bool put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs);
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
- return __put_devmap_managed_page(page);
+ return __put_devmap_managed_page_refs(page, refs);
}
-
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
+static inline bool put_devmap_managed_page(struct page *page)
+{
+ return put_devmap_managed_page_refs(page, 1);
+}
+
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index ee5a217de2a8..f6e5369d2928 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -13,6 +13,129 @@ struct user_namespace;
*/
extern struct user_namespace init_user_ns;
+typedef struct {
+ uid_t val;
+} vfsuid_t;
+
+typedef struct {
+ gid_t val;
+} vfsgid_t;
+
+static_assert(sizeof(vfsuid_t) == sizeof(kuid_t));
+static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
+static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
+static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
+
+#ifdef CONFIG_MULTIUSER
+static inline uid_t __vfsuid_val(vfsuid_t uid)
+{
+ return uid.val;
+}
+
+static inline gid_t __vfsgid_val(vfsgid_t gid)
+{
+ return gid.val;
+}
+#else
+static inline uid_t __vfsuid_val(vfsuid_t uid)
+{
+ return 0;
+}
+
+static inline gid_t __vfsgid_val(vfsgid_t gid)
+{
+ return 0;
+}
+#endif
+
+static inline bool vfsuid_valid(vfsuid_t uid)
+{
+ return __vfsuid_val(uid) != (uid_t)-1;
+}
+
+static inline bool vfsgid_valid(vfsgid_t gid)
+{
+ return __vfsgid_val(gid) != (gid_t)-1;
+}
+
+static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right)
+{
+ return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right);
+}
+
+static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right)
+{
+ return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right);
+}
+
+/**
+ * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value
+ * @vfsuid: the vfsuid to compare
+ * @kuid: the kuid to compare
+ *
+ * Check whether @vfsuid and @kuid have the same values.
+ *
+ * Return: true if @vfsuid and @kuid have the same value, false if not.
+ * Comparison between two invalid uids returns false.
+ */
+static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+ return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid);
+}
+
+/**
+ * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value
+ * @vfsgid: the vfsgid to compare
+ * @kgid: the kgid to compare
+ *
+ * Check whether @vfsgid and @kgid have the same values.
+ *
+ * Return: true if @vfsgid and @kgid have the same value, false if not.
+ * Comparison between two invalid gids returns false.
+ */
+static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+ return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid);
+}
+
+/*
+ * vfs{g,u}ids are created from k{g,u}ids.
+ * We don't allow them to be created from regular {u,g}id.
+ */
+#define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) }
+#define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) }
+
+#define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID)
+#define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID)
+
+/*
+ * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare
+ * whether the mapped value is identical to value of a k{g,u}id.
+ */
+#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) }
+#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) }
+
+#ifdef CONFIG_MULTIUSER
+/**
+ * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
+ * @vfsgid: the mnt gid to match
+ *
+ * This function can be used to determine whether @vfsuid matches any of the
+ * caller's groups.
+ *
+ * Return: 1 if vfsuid matches caller's groups, 0 if not.
+ */
+static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return in_group_p(AS_KGIDT(vfsgid));
+}
+#else
+static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return 1;
+}
+#endif
+
/**
* initial_idmapping - check whether this is the initial mapping
* @ns: idmapping to check
@@ -48,7 +171,7 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
}
/**
- * mapped_kuid_fs - map a filesystem kuid into a mnt_userns
+ * make_vfsuid - map a filesystem kuid into a mnt_userns
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kuid : kuid to be mapped
@@ -67,25 +190,33 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
* If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
* returned.
*/
-static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kuid_t kuid)
+
+static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
{
uid_t uid;
if (no_idmapping(mnt_userns, fs_userns))
- return kuid;
+ return VFSUIDT_INIT(kuid);
if (initial_idmapping(fs_userns))
uid = __kuid_val(kuid);
else
uid = from_kuid(fs_userns, kuid);
if (uid == (uid_t)-1)
- return INVALID_UID;
- return make_kuid(mnt_userns, uid);
+ return INVALID_VFSUID;
+ return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
+}
+
+static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
+{
+ return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid));
}
/**
- * mapped_kgid_fs - map a filesystem kgid into a mnt_userns
+ * make_vfsgid - map a filesystem kgid into a mnt_userns
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kgid : kgid to be mapped
@@ -104,21 +235,56 @@ static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
* If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
* returned.
*/
-static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kgid_t kgid)
+
+static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
{
gid_t gid;
if (no_idmapping(mnt_userns, fs_userns))
- return kgid;
+ return VFSGIDT_INIT(kgid);
if (initial_idmapping(fs_userns))
gid = __kgid_val(kgid);
else
gid = from_kgid(fs_userns, kgid);
if (gid == (gid_t)-1)
- return INVALID_GID;
- return make_kgid(mnt_userns, gid);
+ return INVALID_VFSGID;
+ return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
+}
+
+static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
+{
+ return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid));
+}
+
+/**
+ * from_vfsuid - map a vfsuid into the filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsuid : vfsuid to be mapped
+ *
+ * Map @vfsuid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsuid to inode->i_uid.
+ *
+ * Return: @vfsuid mapped into the filesystem idmapping
+ */
+static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsuid_t vfsuid)
+{
+ uid_t uid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return AS_KUIDT(vfsuid);
+ uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
+ if (uid == (uid_t)-1)
+ return INVALID_UID;
+ if (initial_idmapping(fs_userns))
+ return KUIDT_INIT(uid);
+ return make_kuid(fs_userns, uid);
}
/**
@@ -145,16 +311,66 @@ static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns,
kuid_t kuid)
{
- uid_t uid;
+ return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid));
+}
+
+/**
+ * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsuid: vfsuid to be mapped
+ *
+ * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this
+ * function to check whether the filesystem idmapping has a mapping for
+ * @vfsuid.
+ *
+ * Return: true if @vfsuid has a mapping in the filesystem, false if not.
+ */
+static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsuid_t vfsuid)
+{
+ return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid));
+}
+
+/**
+ * vfsuid_into_kuid - convert vfsuid into kuid
+ * @vfsuid: the vfsuid to convert
+ *
+ * This can be used when a vfsuid is committed as a kuid.
+ *
+ * Return: a kuid with the value of @vfsuid
+ */
+static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid)
+{
+ return AS_KUIDT(vfsuid);
+}
+
+/**
+ * from_vfsgid - map a vfsgid into the filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsgid : vfsgid to be mapped
+ *
+ * Map @vfsgid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsgid to inode->i_gid.
+ *
+ * Return: @vfsgid mapped into the filesystem idmapping
+ */
+static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsgid_t vfsgid)
+{
+ gid_t gid;
if (no_idmapping(mnt_userns, fs_userns))
- return kuid;
- uid = from_kuid(mnt_userns, kuid);
- if (uid == (uid_t)-1)
- return INVALID_UID;
+ return AS_KGIDT(vfsgid);
+ gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
+ if (gid == (gid_t)-1)
+ return INVALID_GID;
if (initial_idmapping(fs_userns))
- return KUIDT_INIT(uid);
- return make_kuid(fs_userns, uid);
+ return KGIDT_INIT(gid);
+ return make_kgid(fs_userns, gid);
}
/**
@@ -181,16 +397,39 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns,
kgid_t kgid)
{
- gid_t gid;
+ return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid));
+}
- if (no_idmapping(mnt_userns, fs_userns))
- return kgid;
- gid = from_kgid(mnt_userns, kgid);
- if (gid == (gid_t)-1)
- return INVALID_GID;
- if (initial_idmapping(fs_userns))
- return KGIDT_INIT(gid);
- return make_kgid(fs_userns, gid);
+/**
+ * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsgid: vfsgid to be mapped
+ *
+ * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this
+ * function to check whether the filesystem idmapping has a mapping for
+ * @vfsgid.
+ *
+ * Return: true if @vfsgid has a mapping in the filesystem, false if not.
+ */
+static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsgid_t vfsgid)
+{
+ return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid));
+}
+
+/**
+ * vfsgid_into_kgid - convert vfsgid into kgid
+ * @vfsgid: the vfsgid to convert
+ *
+ * This can be used when a vfsgid is committed as a kgid.
+ *
+ * Return: a kgid with the value of @vfsgid
+ */
+static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
+{
+ return AS_KGIDT(vfsgid);
}
/**
@@ -209,7 +448,8 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns)
{
- return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid());
+ return from_vfsuid(mnt_userns, fs_userns,
+ VFSUIDT_INIT(current_fsuid()));
}
/**
@@ -228,7 +468,8 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns)
{
- return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid());
+ return from_vfsgid(mnt_userns, fs_userns,
+ VFSGIDT_INIT(current_fsgid()));
}
#endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0178823ce8c2..7fa460ccf7fa 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -556,10 +556,13 @@
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727
#define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index b65c877d92b8..7d1e604c1325 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -73,6 +73,7 @@ extern int set_posix_acl(struct user_namespace *, struct inode *, int,
struct posix_acl *);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
+struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags);
#ifdef CONFIG_FS_POSIX_ACL
int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t);
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 1766e1de6956..b6bd3eac2bcc 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -33,21 +33,31 @@ posix_acl_xattr_count(size_t size)
}
#ifdef CONFIG_FS_POSIX_ACL
-void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size);
-void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size);
+void posix_acl_fix_xattr_from_user(void *value, size_t size);
+void posix_acl_fix_xattr_to_user(void *value, size_t size);
+void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size);
+void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size);
#else
-static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+static inline void posix_acl_fix_xattr_from_user(void *value, size_t size)
{
}
-static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+static inline void posix_acl_fix_xattr_to_user(void *value, size_t size)
+{
+}
+static inline void
+posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode, void *value,
+ size_t size)
+{
+}
+static inline void
+posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode, void *value,
+ size_t size)
{
}
#endif
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a0f6668924d3..0d8625d71733 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,11 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
}
/* i_mutex must being held */
-static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
+static inline bool is_quota_modification(struct user_namespace *mnt_userns,
+ struct inode *inode, struct iattr *ia)
{
- return (ia->ia_valid & ATTR_SIZE) ||
- (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) ||
- (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid));
+ return ((ia->ia_valid & ATTR_SIZE) ||
+ i_uid_needs_update(mnt_userns, ia, inode) ||
+ i_gid_needs_update(mnt_userns, ia, inode));
}
#if defined(CONFIG_QUOTA)
@@ -115,7 +116,8 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id,
struct qc_dqblk *di);
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
-int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+ struct iattr *iattr);
static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
{
@@ -234,7 +236,8 @@ static inline void dquot_free_inode(struct inode *inode)
{
}
-static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
+static inline int dquot_transfer(struct user_namespace *mnt_userns,
+ struct inode *inode, struct iattr *iattr)
{
return 0;
}
diff --git a/include/linux/security.h b/include/linux/security.h
index 7fc4e9f49f54..4d0baf30266e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -353,7 +353,8 @@ int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
bool rcu);
int security_inode_permission(struct inode *inode, int mask);
-int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
+int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(const struct path *path);
int security_inode_setxattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
@@ -848,8 +849,9 @@ static inline int security_inode_permission(struct inode *inode, int mask)
return 0;
}
-static inline int security_inode_setattr(struct dentry *dentry,
- struct iattr *attr)
+static inline int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ struct iattr *attr)
{
return 0;
}
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 29917850f079..8df475db88c0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -260,6 +260,7 @@ struct plat_stmmacenet_data {
bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
+ bool int_snapshot_en;
bool ext_snapshot_en;
bool multi_msi_en;
int msi_mac_vec;
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 4c379d23ec6e..979a9d3e5bfb 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
const char *, const void *, size_t, int,
struct inode **);
int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
- const void *, size_t, int);
+ void *, size_t, int);
int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
const char *, struct inode **);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index f7506f08e505..c04f359655b8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
{
const struct inet6_dev *idev = __in6_dev_get(dev);
+ if (unlikely(!idev))
+ return true;
+
return !!idev->cnf.ignore_routes_with_linkdown;
}
diff --git a/include/net/amt.h b/include/net/amt.h
index 0e40c3d64fcf..08fc30cf2f34 100644
--- a/include/net/amt.h
+++ b/include/net/amt.h
@@ -78,6 +78,15 @@ enum amt_status {
#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
+/* Gateway events only */
+enum amt_event {
+ AMT_EVENT_NONE,
+ AMT_EVENT_RECEIVE,
+ AMT_EVENT_SEND_DISCOVERY,
+ AMT_EVENT_SEND_REQUEST,
+ __AMT_EVENT_MAX,
+};
+
struct amt_header {
#if defined(__LITTLE_ENDIAN_BITFIELD)
u8 type:4,
@@ -292,6 +301,12 @@ struct amt_group_node {
struct hlist_head sources[];
};
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+ enum amt_event event;
+ struct sk_buff *skb;
+};
+
struct amt_dev {
struct net_device *dev;
struct net_device *stream_dev;
@@ -308,6 +323,7 @@ struct amt_dev {
struct delayed_work req_wq;
/* Protected by RTNL */
struct delayed_work secret_wq;
+ struct work_struct event_wq;
/* AMT status */
enum amt_status status;
/* Generated key */
@@ -345,6 +361,10 @@ struct amt_dev {
/* Used only in gateway mode */
u64 mac:48,
reserved:16;
+ /* AMT gateway side message handler queue */
+ struct amt_events events[AMT_MAX_EVENTS];
+ u8 event_idx;
+ u8 nr_events;
};
#define AMT_TOS 0xc0
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3c4f550e5a8b..2f766e3437ce 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -847,6 +847,7 @@ enum {
};
void l2cap_chan_hold(struct l2cap_chan *c);
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
void l2cap_chan_put(struct l2cap_chan *c);
static inline void l2cap_chan_lock(struct l2cap_chan *chan)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 85cd695e7fd1..ee88f0f1350f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -321,7 +321,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 3
+#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
@@ -338,14 +338,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ack.pingpong < U8_MAX)
- icsk->icsk_ack.pingpong++;
-}
-
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ebfa3df6f8dc..fd6b510d114b 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index daead5fb389a..6395f6b9a5d2 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
@@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
@@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
@@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
diff --git a/include/net/ip.h b/include/net/ip.h
index 26fffda78cca..1c979fd1904c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
@@ -384,7 +384,7 @@ void ipfrag_init(void);
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
@@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
struct net *net = dev_net(dst->dev);
unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
mtu = rt->rt_pmtu;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index f51c06ae365f..6aef8cb11cc8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -35,8 +35,6 @@
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
@@ -52,8 +50,6 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
diff --git a/include/net/route.h b/include/net/route.h
index 991a3985712d..bbcf2aba149f 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
- hoplimit = net->ipv4.sysctl_ip_default_ttl;
+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
return hoplimit;
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 9fa54762e077..7a48991cdb19 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1e99f5c61f84..78a64e1b33a7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
#endif
@@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
diff --git a/include/net/udp.h b/include/net/udp.h
index b83a00330566..8dd4aa1485a6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
@@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
index 32088c603244..bad21222130e 100644
--- a/include/trace/events/dlm.h
+++ b/include/trace/events/dlm.h
@@ -49,38 +49,52 @@
/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
TRACE_EVENT(dlm_lock_start,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
- __u32 flags),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
+ unsigned int namelen, int mode, __u32 flags),
- TP_ARGS(ls, lkb, mode, flags),
+ TP_ARGS(ls, lkb, name, namelen, mode, flags),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__field(__u32, flags)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
+ else if (name)
+ memcpy(__get_dynamic_array(res_name), name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
+ TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
- show_lock_flags(__entry->flags))
+ show_lock_flags(__entry->flags),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_lock_end,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
- int error),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
+ unsigned int namelen, int mode, __u32 flags, int error),
- TP_ARGS(ls, lkb, mode, flags, error),
+ TP_ARGS(ls, lkb, name, namelen, mode, flags, error),
TP_STRUCT__entry(
__field(__u32, ls_id)
@@ -88,14 +102,26 @@ TRACE_EVENT(dlm_lock_end,
__field(int, mode)
__field(__u32, flags)
__field(int, error)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
+ else if (name)
+ memcpy(__get_dynamic_array(res_name), name,
+ __get_dynamic_array_len(res_name));
+
/* return value will be zeroed in those cases by dlm_lock()
* we do it here again to not introduce more overhead if
* trace isn't running and error reflects the return value.
@@ -104,12 +130,15 @@ TRACE_EVENT(dlm_lock_end,
__entry->error = 0;
else
__entry->error = error;
+
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
+ TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
- show_lock_flags(__entry->flags), __entry->error)
+ show_lock_flags(__entry->flags), __entry->error,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -123,42 +152,65 @@ TRACE_EVENT(dlm_bast,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
- __entry->lkb_id, show_lock_mode(__entry->mode))
+ TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s",
+ __entry->ls_id, __entry->lkb_id,
+ show_lock_mode(__entry->mode),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_ast,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb),
- TP_ARGS(ls, lkb, lksb),
+ TP_ARGS(ls, lkb),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(u8, sb_flags)
__field(int, sb_status)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
- __entry->sb_flags = lksb->sb_flags;
- __entry->sb_status = lksb->sb_status;
+ __entry->sb_flags = lkb->lkb_lksb->sb_flags;
+ __entry->sb_status = lkb->lkb_lksb->sb_status;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
+ TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
+ show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -173,17 +225,28 @@ TRACE_EVENT(dlm_unlock_start,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(__u32, flags)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x flags=%s",
+ TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_lock_flags(__entry->flags))
+ show_lock_flags(__entry->flags),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -199,18 +262,29 @@ TRACE_EVENT(dlm_unlock_end,
__field(__u32, lkb_id)
__field(__u32, flags)
__field(int, error)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
__entry->error = error;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
+ TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_lock_flags(__entry->flags), __entry->error)
+ show_lock_flags(__entry->flags), __entry->error,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index f13d37b60775..1ecdb911add8 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -192,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -216,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index f1f89132d60e..d8536d77fea1 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -83,12 +83,20 @@
#define FAN_MARK_FLUSH 0x00000080
/* FAN_MARK_FILESYSTEM is 0x00000100 */
#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
/* These are NOT bitwise flags. Both bits can be used togther. */
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
/* Deprecated - do not use this in programs and do not add new flags here! */
#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
FAN_MARK_REMOVE |\
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 811897dadcae..860f867c50c0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2084,7 +2084,7 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/kernel/configs/x86_debug.config b/kernel/configs/x86_debug.config
index dcd86f32f4ed..6fac5b405334 100644
--- a/kernel/configs/x86_debug.config
+++ b/kernel/configs/x86_debug.config
@@ -7,12 +7,11 @@ CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_SLUB_DEBUG_ON=y
-CONFIG_KMEMCHECK=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
CONFIG_GCOV_KERNEL=y
CONFIG_LOCKDEP=y
CONFIG_PROVE_LOCKING=y
CONFIG_SCHEDSTATS=y
-CONFIG_VMLINUX_VALIDATION=y
+CONFIG_NOINSTR_VALIDATION=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 9d1db4a54d34..65f0262f635e 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -335,8 +335,6 @@ struct rwsem_waiter {
struct task_struct *task;
enum rwsem_waiter_type type;
unsigned long timeout;
-
- /* Writer only, not initialized in reader */
bool handoff_set;
};
#define rwsem_first_waiter(sem) \
@@ -459,10 +457,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* to give up the lock), request a HANDOFF to
* force the issue.
*/
- if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
- time_after(jiffies, waiter->timeout)) {
- adjustment -= RWSEM_FLAG_HANDOFF;
- lockevent_inc(rwsem_rlock_handoff);
+ if (time_after(jiffies, waiter->timeout)) {
+ if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
+ adjustment -= RWSEM_FLAG_HANDOFF;
+ lockevent_inc(rwsem_rlock_handoff);
+ }
+ waiter->handoff_set = true;
}
atomic_long_add(-adjustment, &sem->count);
@@ -599,7 +599,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
- bool first = rwsem_first_waiter(sem) == waiter;
+ struct rwsem_waiter *first = rwsem_first_waiter(sem);
long count, new;
lockdep_assert_held(&sem->wait_lock);
@@ -609,11 +609,20 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
if (has_handoff) {
- if (!first)
+ /*
+ * Honor handoff bit and yield only when the first
+ * waiter is the one that set it. Otherwisee, we
+ * still try to acquire the rwsem.
+ */
+ if (first->handoff_set && (waiter != first))
return false;
- /* First waiter inherits a previously set handoff bit */
- waiter->handoff_set = true;
+ /*
+ * First waiter can inherit a previously set handoff
+ * bit and spin on rwsem if lock acquisition fails.
+ */
+ if (waiter == first)
+ waiter->handoff_set = true;
}
new = count;
@@ -1027,6 +1036,7 @@ queue:
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+ waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 50ba70f019de..1c304fec89c0 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
return sum;
}
-#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
-#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
-#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited(). We spin for a fixed time period
+ * (defined below, boot time configurable) to allow SRCU readers to exit
+ * their read-side critical sections. If there are still some readers
+ * after one jiffy, we repeatedly block for one jiffy time periods.
+ * The blocking time is increased as the grace-period age increases,
+ * with max blocking time capped at 10 jiffies.
+ */
+#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5
+
+static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
+module_param(srcu_retry_check_delay, ulong, 0444);
+
+#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
+
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
+ // no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
+ // no-delay instances.
+
+#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low))
+#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high))
+#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
+// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
+// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
+// called from process_srcu().
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \
+ (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
+
+// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
+ SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
+
+static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
+module_param(srcu_max_nodelay_phase, ulong, 0444);
+
+// Maximum consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
+
+static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
+module_param(srcu_max_nodelay, ulong, 0444);
/*
* Return grace-period delay, zero if there are expedited grace
@@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
+ unsigned long gpstart;
+ unsigned long j;
unsigned long jbase = SRCU_INTERVAL;
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
jbase = 0;
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
- jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
- if (!jbase) {
- WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
- jbase = 1;
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+ j = jiffies - 1;
+ gpstart = READ_ONCE(ssp->srcu_gp_start);
+ if (time_after(j, gpstart))
+ jbase += j - gpstart;
+ if (!jbase) {
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
+ jbase = 1;
+ }
}
return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
@@ -607,15 +655,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited(). We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections. If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
- */
-#define SRCU_RETRY_CHECK_DELAY 5
-
-/*
* Start an SRCU grace period.
*/
static void srcu_gp_start(struct srcu_struct *ssp)
@@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
*/
static void srcu_gp_end(struct srcu_struct *ssp)
{
- unsigned long cbdelay;
+ unsigned long cbdelay = 1;
bool cbs;
bool last_lvl;
int cpu;
@@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- cbdelay = !!srcu_get_delay(ssp);
+ if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ cbdelay = 0;
+
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
+ unsigned long curdelay;
+
+ curdelay = !srcu_get_delay(ssp);
+
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
return true;
- if (--trycount + !srcu_get_delay(ssp) <= 0)
+ if ((--trycount + curdelay) <= 0)
return false;
- udelay(SRCU_RETRY_CHECK_DELAY);
+ udelay(srcu_retry_check_delay);
}
}
@@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work)
j = jiffies;
if (READ_ONCE(ssp->reschedule_jiffies) == j) {
WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
- if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+ if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
WRITE_ONCE(ssp->reschedule_count, 1);
@@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void)
pr_info("Hierarchical SRCU implementation.\n");
if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+ if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
+ pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
+ if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
+ pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
+ pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
return 0;
}
early_initcall(srcu_bootup_announce);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b5152961b743..7bf561262cb8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1701,7 +1701,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* the throttle.
*/
p->dl.dl_throttled = 0;
- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
+ if (!(flags & ENQUEUE_REPLENISH))
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
+ task_pid_nr(p));
+
return;
}
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 230038d4f908..59ddb00d6944 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -34,6 +34,27 @@ MODULE_LICENSE("GPL");
#define WATCH_QUEUE_NOTE_SIZE 128
#define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)
+/*
+ * This must be called under the RCU read-lock, which makes
+ * sure that the wqueue still exists. It can then take the lock,
+ * and check that the wqueue hasn't been destroyed, which in
+ * turn makes sure that the notification pipe still exists.
+ */
+static inline bool lock_wqueue(struct watch_queue *wqueue)
+{
+ spin_lock_bh(&wqueue->lock);
+ if (unlikely(wqueue->defunct)) {
+ spin_unlock_bh(&wqueue->lock);
+ return false;
+ }
+ return true;
+}
+
+static inline void unlock_wqueue(struct watch_queue *wqueue)
+{
+ spin_unlock_bh(&wqueue->lock);
+}
+
static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {
/*
* Post a notification to a watch queue.
+ *
+ * Must be called with the RCU lock for reading, and the
+ * watch_queue lock held, which guarantees that the pipe
+ * hasn't been released.
*/
static bool post_one_notification(struct watch_queue *wqueue,
struct watch_notification *n)
@@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
spin_lock_irq(&pipe->rd_wait.lock);
- if (wqueue->defunct)
- goto out;
-
mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;
@@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist,
if (security_post_notification(watch->cred, cred, n) < 0)
continue;
- post_one_notification(wqueue, n);
+ if (lock_wqueue(wqueue)) {
+ post_one_notification(wqueue, n);
+ unlock_wqueue(wqueue);
+ }
}
rcu_read_unlock();
@@ -429,6 +454,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
rcu_assign_pointer(watch->queue, wqueue);
}
+static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
+{
+ const struct cred *cred;
+ struct watch *w;
+
+ hlist_for_each_entry(w, &wlist->watchers, list_node) {
+ struct watch_queue *wq = rcu_access_pointer(w->queue);
+ if (wqueue == wq && watch->id == w->id)
+ return -EBUSY;
+ }
+
+ cred = current_cred();
+ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
+ atomic_dec(&cred->user->nr_watches);
+ return -EAGAIN;
+ }
+
+ watch->cred = get_cred(cred);
+ rcu_assign_pointer(watch->watch_list, wlist);
+
+ kref_get(&wqueue->usage);
+ kref_get(&watch->usage);
+ hlist_add_head(&watch->queue_node, &wqueue->watches);
+ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+ return 0;
+}
+
/**
* add_watch_to_object - Add a watch on an object to a watch list
* @watch: The watch to add
@@ -443,33 +495,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
*/
int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
{
- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
- struct watch *w;
-
- hlist_for_each_entry(w, &wlist->watchers, list_node) {
- struct watch_queue *wq = rcu_access_pointer(w->queue);
- if (wqueue == wq && watch->id == w->id)
- return -EBUSY;
- }
+ struct watch_queue *wqueue;
+ int ret = -ENOENT;
- watch->cred = get_current_cred();
- rcu_assign_pointer(watch->watch_list, wlist);
+ rcu_read_lock();
- if (atomic_inc_return(&watch->cred->user->nr_watches) >
- task_rlimit(current, RLIMIT_NOFILE)) {
- atomic_dec(&watch->cred->user->nr_watches);
- put_cred(watch->cred);
- return -EAGAIN;
+ wqueue = rcu_access_pointer(watch->queue);
+ if (lock_wqueue(wqueue)) {
+ spin_lock(&wlist->lock);
+ ret = add_one_watch(watch, wlist, wqueue);
+ spin_unlock(&wlist->lock);
+ unlock_wqueue(wqueue);
}
- spin_lock_bh(&wqueue->lock);
- kref_get(&wqueue->usage);
- kref_get(&watch->usage);
- hlist_add_head(&watch->queue_node, &wqueue->watches);
- spin_unlock_bh(&wqueue->lock);
-
- hlist_add_head(&watch->list_node, &wlist->watchers);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(add_watch_to_object);
@@ -520,20 +560,15 @@ found:
wqueue = rcu_dereference(watch->queue);
- /* We don't need the watch list lock for the next bit as RCU is
- * protecting *wqueue from deallocation.
- */
- if (wqueue) {
+ if (lock_wqueue(wqueue)) {
post_one_notification(wqueue, &n.watch);
- spin_lock_bh(&wqueue->lock);
-
if (!hlist_unhashed(&watch->queue_node)) {
hlist_del_init_rcu(&watch->queue_node);
put_watch(watch);
}
- spin_unlock_bh(&wqueue->lock);
+ unlock_wqueue(wqueue);
}
if (wlist->release_watch) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ea50f6be843..aa8a82bc6738 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5001,7 +5001,10 @@ static void unbind_workers(int cpu)
for_each_pool_worker(worker, pool) {
kthread_set_per_cpu(worker->task, -1);
- WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ else
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
}
mutex_unlock(&wq_pool_attach_mutex);
diff --git a/mm/gup.c b/mm/gup.c
index 551264407624..e2a39e30756d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -87,7 +87,8 @@ retry:
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
goto retry;
}
@@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
refs *= GUP_PIN_COUNTING_BIAS;
}
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
}
/**
diff --git a/mm/hmm.c b/mm/hmm.c
index 3fd3242c5e50..f2aa63b94d9b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool hmm_is_device_private_entry(struct hmm_range *range,
- swp_entry_t entry)
-{
- return is_device_private_entry(entry) &&
- pfn_swap_entry_to_page(entry)->pgmap->owner ==
- range->dev_private_owner;
-}
-
static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
pte_t pte)
{
@@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
swp_entry_t entry = pte_to_swp_entry(pte);
/*
- * Never fault in device private pages, but just report
- * the PFN even if not present.
+ * Don't fault in device private pages owned by the caller,
+ * just report the PFN.
*/
- if (hmm_is_device_private_entry(range, entry)) {
+ if (is_device_private_entry(entry) &&
+ pfn_swap_entry_to_page(entry)->pgmap->owner ==
+ range->dev_private_owner) {
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
@@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
if (!non_swap_entry(entry))
goto fault;
+ if (is_device_private_entry(entry))
+ goto fault;
+
if (is_device_exclusive_entry(entry))
goto fault;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a57e1be41401..a18c071c294e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4788,8 +4788,13 @@ again:
* sharing with another vma.
*/
;
- } else if (unlikely(is_hugetlb_entry_migration(entry) ||
- is_hugetlb_entry_hwpoisoned(entry))) {
+ } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
+ bool uffd_wp = huge_pte_uffd_wp(entry);
+
+ if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ entry = huge_pte_clear_uffd_wp(entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ } else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = huge_pte_uffd_wp(entry);
@@ -5947,6 +5952,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page)) {
+ put_page(*pagep);
ret = -ENOMEM;
*pagep = NULL;
goto out;
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 4b5e5a3d3a63..6aff49f6b79e 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -603,14 +603,6 @@ static unsigned long kfence_init_pool(void)
addr += 2 * PAGE_SIZE;
}
- /*
- * The pool is live and will never be deallocated from this point on.
- * Remove the pool object from the kmemleak object tree, as it would
- * otherwise overlap with allocations returned by kfence_alloc(), which
- * are registered with kmemleak through the slab post-alloc hook.
- */
- kmemleak_free(__kfence_pool);
-
return 0;
}
@@ -623,8 +615,16 @@ static bool __init kfence_init_pool_early(void)
addr = kfence_init_pool();
- if (!addr)
+ if (!addr) {
+ /*
+ * The pool is live and will never be deallocated from this point on.
+ * Ignore the pool object from the kmemleak phys object tree, as it would
+ * otherwise overlap with allocations returned by kfence_alloc(), which
+ * are registered with kmemleak through the slab post-alloc hook.
+ */
+ kmemleak_ignore_phys(__pa(__kfence_pool));
return true;
+ }
/*
* Only release unprotected pages, and do not try to go back and change
diff --git a/mm/memory.c b/mm/memory.c
index 4cf7d4b6c950..1c6027adc542 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3043,7 +3043,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
pte_t entry;
VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));
- VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page));
+ VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page));
/*
* Clear the pages cpupid information as the existing
@@ -4369,9 +4369,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return VM_FAULT_OOM;
}
- /* See comment in handle_pte_fault() */
+ /*
+ * See comment in handle_pte_fault() for how this scenario happens, we
+ * need to return NOPAGE so that we drop this page.
+ */
if (pmd_devmap_trans_unstable(vmf->pmd))
- return 0;
+ return VM_FAULT_NOPAGE;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
diff --git a/mm/memremap.c b/mm/memremap.c
index b870a659eee6..745eea0f99c3 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page)
}
#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs)
{
if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
@@ -509,9 +509,9 @@ bool __put_devmap_managed_page(struct page *page)
* refcount is 1, then the page is free and the refcount is
* stable because nobody holds a reference on the page.
*/
- if (page_ref_dec_return(page) == 1)
+ if (page_ref_sub_return(page, refs) == 1)
wake_up_var(&page->_refcount);
return true;
}
-EXPORT_SYMBOL(__put_devmap_managed_page);
+EXPORT_SYMBOL(__put_devmap_managed_page_refs);
#endif /* CONFIG_FS_DAX */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e008a3df0485..b5b14b78c4fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3968,11 +3968,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
* need to be calculated.
*/
if (!order) {
- long fast_free;
+ long usable_free;
+ long reserved;
- fast_free = free_pages;
- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
+ usable_free = free_pages;
+ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
+
+ /* reserved may over estimate high-atomic reserves. */
+ usable_free -= min(usable_free, reserved);
+ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
return true;
}
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 206ed6b40c1d..f06279d6190a 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
gfp_t gfp = vmf->gfp_mask;
unsigned long addr;
struct page *page;
+ vm_fault_t ret;
int err;
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return vmf_error(-EINVAL);
+ filemap_invalidate_lock_shared(mapping);
+
retry:
page = find_lock_page(mapping, offset);
if (!page) {
page = alloc_page(gfp | __GFP_ZERO);
- if (!page)
- return VM_FAULT_OOM;
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
err = set_direct_map_invalid_noflush(page);
if (err) {
put_page(page);
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
__SetPageUptodate(page);
@@ -86,7 +92,8 @@ retry:
if (err == -EEXIST)
goto retry;
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
addr = (unsigned long)page_address(page);
@@ -94,7 +101,11 @@ retry:
}
vmf->page = page;
- return VM_FAULT_LOCKED;
+ ret = VM_FAULT_LOCKED;
+
+out:
+ filemap_invalidate_unlock_shared(mapping);
+ return ret;
}
static const struct vm_operations_struct secretmem_vm_ops = {
@@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
+ struct address_space *mapping = inode->i_mapping;
unsigned int ia_valid = iattr->ia_valid;
+ int ret;
+
+ filemap_invalidate_lock(mapping);
if ((ia_valid & ATTR_SIZE) && inode->i_size)
- return -EINVAL;
+ ret = -EINVAL;
+ else
+ ret = simple_setattr(mnt_userns, dentry, iattr);
- return simple_setattr(mnt_userns, dentry, iattr);
+ filemap_invalidate_unlock(mapping);
+
+ return ret;
}
static const struct inode_operations secretmem_iops = {
diff --git a/mm/shmem.c b/mm/shmem.c
index a6f565308133..b7f2d4a56867 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3392,7 +3392,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_nr_blocks:
ctx->blocks = memparse(param->string, &rest);
- if (*rest)
+ if (*rest || ctx->blocks > S64_MAX)
goto bad_value;
ctx->seen |= SHMEM_SEEN_BLOCKS;
break;
@@ -3514,10 +3514,7 @@ static int shmem_reconfigure(struct fs_context *fc)
raw_spin_lock(&sbinfo->stat_lock);
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- if (ctx->blocks > S64_MAX) {
- err = "Number of blocks too large";
- goto out;
- }
+
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
err = "Cannot retroactively limit size";
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 1739e8cb3291..c17021642234 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4973,6 +4973,9 @@ int hci_suspend_sync(struct hci_dev *hdev)
return err;
}
+ /* Update event mask so only the allowed event can wakeup the host */
+ hci_set_event_mask_sync(hdev);
+
/* Only configure accept list if disconnect succeeded and wake
* isn't being prevented.
*/
@@ -4984,9 +4987,6 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Unpause to take care of updating scanning params */
hdev->scanning_paused = false;
- /* Update event mask so only the allowed event can wakeup the host */
- hci_set_event_mask_sync(hdev);
-
/* Enable event filter for paired devices */
hci_update_event_filter_sync(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ae78490ecd3d..52668662ae8d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
@@ -1968,7 +1991,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
+ if (!c)
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
@@ -1983,7 +2009,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
@@ -4463,6 +4489,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4577,6 +4604,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -5304,6 +5332,7 @@ send_move_response:
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5396,6 +5425,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
@@ -5425,6 +5455,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
@@ -5488,6 +5519,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5523,6 +5555,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5895,12 +5928,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
@@ -5911,7 +5943,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -7597,6 +7631,7 @@ drop:
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
@@ -8085,7 +8120,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ae758ab1b558..2f91a8c2b678 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4723,7 +4723,6 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
else
status = MGMT_STATUS_FAILED;
- mgmt_pending_remove(cmd);
goto unlock;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index bb01776d2d88..c96509c442a5 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -589,9 +589,13 @@ static int br_fill_ifinfo(struct sk_buff *skb,
}
done:
+ if (af) {
+ if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0))
+ nla_nest_end(skb, af);
+ else
+ nla_nest_cancel(skb, af);
+ }
- if (af)
- nla_nest_end(skb, af);
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 251e666ba9a2..748be7253248 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -47,7 +47,7 @@ enum caif_states {
struct caifsock {
struct sock sk; /* must be first member */
struct cflayer layer;
- u32 flow_state;
+ unsigned long flow_state;
struct caif_connect_request conn_req;
struct mutex readlock;
struct dentry *debugfs_socket_dir;
@@ -56,38 +56,32 @@ struct caifsock {
static int rx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static int tx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_on(struct caifsock *cf_sk)
{
- set_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_on(struct caifsock *cf_sk)
{
- set_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void caif_read_lock(struct sock *sk)
diff --git a/net/core/filter.c b/net/core/filter.c
index 2a6a0b0ce43e..7950f7520765 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7041,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
@@ -7116,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 5f85e01d4093..b0ff6153be62 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f00a28fe762..5daa1fa54249 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
prog = rcu_dereference_protected(reuse->prog,
lockdep_is_held(&reuseport_lock));
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* Migration capable, move sk from the listening section
* to the closed section.
@@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
hash = migrating_sk->sk_hash;
prog = rcu_dereference(reuse->prog);
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
goto select_by_hash;
goto failure;
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index dc92a67baea3..7d542eb46172 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
+ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
/* Initialization of DECnet Session Control Port */
scp = DN_SK(sk);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 3738f2d40a0b..2dd76eb1621c 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
bool vlan_filtering;
int err;
@@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
* VLAN-aware bridge.
*/
if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
+ dsa_switch_for_each_port(other_dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(other_dp);
if (br && br_vlan_enabled(br)) {
change_vlan_filtering = false;
@@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *slave = other_dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 2b56218fc57c..4dfd68cf61c5 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -344,6 +344,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
ether_addr_copy(a->addr, addr);
a->vid = vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &lag->fdbs);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ac67f6b4ec70..252c8bceaba4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -335,7 +335,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 6eea1e9e998d..f8ad04470d3a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("%s: %s digestsize %u != %hu\n",
+ pr_info("%s: %s digestsize %u != %u\n",
__func__, x->aalg->alg_name,
crypto_ahash_digestsize(ahash),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b21238df3301..b694f352ce7a 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x)
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %hu\n",
+ pr_info("ESP: %s digestsize %u != %u\n",
x->aalg->alg_name,
crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d9fdcbae16ee..db7b2503f068 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 46e8a5125853..452ff177e4da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
+ u8 fib_notify_on_flag_change;
struct fib_alias *fa_match;
struct sk_buff *skb;
int err;
@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
WRITE_ONCE(fa_match->offload, fri->offload);
WRITE_ONCE(fa_match->trap, fri->trap);
+ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
+
/* 2 means send notifications only if offload_failed was changed. */
- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+ if (fib_notify_on_flag_change == 2 &&
READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ if (!fib_notify_on_flag_change)
goto out;
skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 57c4f0d87a7a..d5d745c3e345 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b65d074d9620..e3ab0cb61624 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 53f5f956d948..eb31c7158b39 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -263,7 +263,7 @@ next_port:
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
@@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e3aa436a1bdf..e18931a6d153 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb)
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1165f717cd1..1b512390b3cf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
int err, drop_reason;
struct rtable *rt;
@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 445a9ecaefa1..a8a323ecbb54 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
@@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist_flex);
@@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
@@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- net->ipv4.sysctl_ip_default_ttl :
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 918c61fda0f3..d640adcaf1b1 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
@@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 28836071f0a6..0088a4c64d77 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 356f535f3443..4702c61207a8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
const struct sk_buff *skb,
bool *p_has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
const struct sk_buff *skb,
bool has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
@@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
struct flow_keys hash_keys;
u32 mhash = 0;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b387c4835155..942d2dfa1115 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 108fd86f2718..5490c285668b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
@@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
@@ -350,61 +350,6 @@ bad_key:
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
@@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "nexthop_compat_mode",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2222dfdde316..766881775abb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
sk_sockets_allocated_inc(sk);
}
@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
tcp_skb_can_collapse_to(skb);
@@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1723,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
@@ -3617,7 +3618,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3967,12 +3969,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -4456,9 +4459,18 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
- /* check the signature */
- genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected,
- NULL, skb);
+ /* Check the signature.
+ * To support dual stack listeners, we need to handle
+ * IPv4-mapped case.
+ */
+ if (family == AF_INET)
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+ else
+ genhash = tp->af_specific->calc_md5_hash(newhash,
+ hash_expected,
+ NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08e..825b216d11f5 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
@@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ec4edc37313..b1637990d570 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf,
- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
*/
static void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
+ int rmem2;
icsk->icsk_ack.quick = 0;
+ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf,
- min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]));
+ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -910,9 +911,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down.
*/
if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
else
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
@@ -1051,7 +1052,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -2030,7 +2031,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2095,7 +2096,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -2139,6 +2141,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -2159,10 +2162,12 @@ void tcp_enter_loss(struct sock *sk)
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2171,7 +2176,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -3054,7 +3059,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -3464,7 +3469,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -3576,7 +3582,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3624,7 +3631,7 @@ static void tcp_send_challenge_ack(struct sock *sk)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
@@ -4056,7 +4063,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4072,7 +4079,7 @@ void tcp_parse_options(const struct net *net,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4080,7 +4087,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -4421,7 +4428,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4468,7 +4475,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
@@ -5514,7 +5521,7 @@ send_now:
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5535,11 +5542,12 @@ send_now:
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
@@ -5567,7 +5575,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -6797,11 +6805,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6809,8 +6820,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
@@ -6859,7 +6869,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
@@ -6893,13 +6903,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
@@ -6948,10 +6960,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5a3c44c4fb..586c102ce152 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -1006,7 +1006,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos;
@@ -1526,7 +1526,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7029b0e98edb..d58e672be31c 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tcp_snd_cwnd(tp) >> 1) > val)
@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
@@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
@@ -462,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6854bb1fb32b..cb95d88497ae 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -781,7 +781,7 @@ listen_overflow:
if (sk != req->rsk_listener)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 11aa0ab10bba..4c376b6d8764 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- /* If this is the first data packet sent in response to the
- * previous received data,
- * and it is a reply for ato after last received packet,
- * increase pingpong count.
- */
- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_inc_pingpong_cnt(sk);
-
tp->lsndtime = now;
+
+ /* If it is a reply for ato after last received
+ * packet, enter pingpong mode.
+ */
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_enter_pingpong_mode(sk);
}
/* Account for an ACK we sent. */
@@ -230,7 +227,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = min_t(u32, space, U16_MAX);
@@ -241,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
*rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
@@ -285,7 +282,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
- sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -791,18 +788,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1719,7 +1716,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
@@ -1762,10 +1760,10 @@ void tcp_mtup_init(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1897,7 +1895,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1975,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
- r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
+ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
bytes += sk->sk_gso_max_size >> r;
@@ -1994,7 +1992,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2282,7 +2280,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2366,7 +2364,7 @@ static int tcp_mtu_probe(struct sock *sk)
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2506,7 +2504,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
@@ -2740,7 +2738,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -3104,7 +3102,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -3646,7 +3644,7 @@ static void tcp_connect_init(struct sock *sk)
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
@@ -3682,7 +3680,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
@@ -4089,7 +4087,7 @@ void tcp_send_probe0(struct sock *sk)
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 48f30e7209f2..50abaa941387 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 20cf4a98c69d..50bba370486e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -406,12 +407,15 @@ abort: tcp_write_err(sk);
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
+ int max_retries;
req->rsk_ops->syn_ack_timeout(req);
+ /* add one more retry for fastopen */
+ max_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
@@ -574,7 +578,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -585,7 +589,7 @@ out_reset_timer:
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70564ddccc46..6f354f8be2c5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -226,7 +226,7 @@ lookup_protocol:
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 0322cc86b84e..e1ebf5e42ebe 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -45,20 +45,23 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7f695c39d9a8..87c699d57b36 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_query_work, 0);
break;
}
}
@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work)
__mld_query_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
/* called with rcu_read_lock() */
@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_report_work, 0);
break;
}
}
@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
__mld_report_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ecf3a553a0dc..8c6c2d82c1cd 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -22,6 +22,11 @@
#include <linux/proc_fs.h>
#include <net/ping.h>
+static void ping_v6_destroy(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
@@ -181,6 +186,7 @@ struct proto pingv6_prot = {
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .destroy = ping_v6_destroy,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9cc123f000fb..5014aa663452 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f37dd4aa91c6..be09941fe6d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
@@ -1822,7 +1822,7 @@ do_time_wait:
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 55afd7f39c04..e2f2e087a753 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 15a73b7fdd75..1a9ada411879 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -377,9 +377,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
bool cancel_scan;
struct cfg80211_nan_func *func;
- spin_lock_bh(&local->fq.lock);
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
- spin_unlock_bh(&local->fq.lock);
+ synchronize_rcu(); /* flush _ieee80211_wake_txqs() */
cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
if (cancel_scan)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index bd8f0f425be4..30d289044e71 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1271,7 +1271,7 @@ raise_win:
if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
if (!tp->rx_opt.rcv_wscale &&
- sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 21a3ed64226e..7e1518bb6115 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1908,7 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -1926,7 +1926,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
@@ -2669,8 +2669,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 63e8892ec807..af28f3b60389 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1533,7 +1533,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
- return err;
+ return 0;
failed_unlink:
list_del(&subflow->node);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index e479dd0561c5..16915f8eef2b 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 646d5fd53604..9f976b11d896 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3340,6 +3340,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
+
+ cond_resched();
}
return 0;
@@ -9367,9 +9369,13 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
break;
}
}
+
+ cond_resched();
}
list_for_each_entry(set, &ctx->table->sets, list) {
+ cond_resched();
+
if (!nft_is_active_next(ctx->net, set))
continue;
if (!(set->flags & NFT_SET_MAP) ||
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a364f8e5e698..87a9009d5234 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -843,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 15e4b7640dc0..da29e92c03e2 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -68,6 +68,31 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
regs->verdict.code = ret;
}
+static int nft_queue_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ break;
+ case NFPROTO_NETDEV: /* lacks okfn */
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
@@ -164,6 +189,7 @@ static const struct nft_expr_ops nft_queue_ops = {
.eval = nft_queue_eval,
.init = nft_queue_init,
.dump = nft_queue_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
@@ -173,6 +199,7 @@ static const struct nft_expr_ops nft_queue_sreg_ops = {
.eval = nft_queue_sreg_eval,
.init = nft_queue_sreg_init,
.dump = nft_queue_sreg_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9bb4d3dcc994..ac366c99086f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action,
struct tc_action *actions[],
struct netlink_ext_ack *extack)
{
- int i, j, index, err = 0;
+ int i, j, k, index, err = 0;
struct tc_action *act;
BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
@@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = tc_act_hw_stats(act->hw_stats);
- entry->hw_index = act->tcfa_index;
index = 0;
err = tc_setup_offload_act(act, entry, &index, extack);
- if (!err)
- j += index;
- else
+ if (err)
goto err_out_locked;
+
+ for (k = 0; k < index ; k++) {
+ entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+ entry[k].hw_index = act->tcfa_index;
+ }
+
+ j += index;
+
spin_unlock_bh(&act->tcfa_lock);
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index be29da09cc7a..3460abceba44 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
- 0, gfp))
- goto fail_init;
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
+ goto stream_free;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 35928fefae33..1a094b087d88 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 6dc95dcc0ff4..ef9fceadef8d 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out_err;
+ return ret;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
- goto out;
-
- ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret)
- goto in_err;
-
- goto out;
+ return 0;
-in_err:
- sched->free(stream);
- genradix_free(&stream->in);
-out_err:
- genradix_free(&stream->out);
- stream->outcnt = 0;
-out:
- return ret;
+ return sctp_stream_alloc_in(stream, incnt, gfp);
}
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 518b1b9bf89d..1ad565ed5627 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -160,7 +160,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c4d057b2941d..0bde36b56472 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
/* called after lgr was removed from lgr_list */
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43509c7e90fc..f1c3b8eb4b3d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index ce827e79c66a..9975df34d9c2 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
@@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk)
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
@@ -1374,8 +1376,13 @@ static int tls_device_down(struct net_device *netdev)
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
* Now release the ref taken above.
*/
- if (refcount_dec_and_test(&ctx->refcount))
+ if (refcount_dec_and_test(&ctx->refcount)) {
+ /* sk_destruct ran after tls_device_down took a ref, and
+ * it returned early. Complete the destruction here.
+ */
+ list_del(&ctx->list);
tls_device_free_ctx(ctx);
+ }
}
up_write(&device_offload_lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1876ea61fdc..f1a0bab920a5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 08564e0eef20..ccfb172eb5b8 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 46f7542db08c..dc07b6d12e30 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -180,7 +180,7 @@ lx-symbols command."""
self.breakpoint.delete()
self.breakpoint = None
self.breakpoint = LoadModuleBreakpoint(
- "kernel/module.c:do_init_module", self)
+ "kernel/module/main.c:do_init_module", self)
else:
gdb.write("Note: symbol update on module loading not supported "
"with this gdb version\n")
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index cc88f02c7562..93e8bc047a73 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -755,13 +755,14 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
evm_update_evmxattr(dentry, xattr_name, NULL, 0);
}
-static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
+static int evm_attr_change(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_backing_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
- if ((!(ia_valid & ATTR_UID) || uid_eq(attr->ia_uid, inode->i_uid)) &&
- (!(ia_valid & ATTR_GID) || gid_eq(attr->ia_gid, inode->i_gid)) &&
+ if (!i_uid_needs_update(mnt_userns, attr, inode) &&
+ !i_gid_needs_update(mnt_userns, attr, inode) &&
(!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode))
return 0;
@@ -775,7 +776,8 @@ static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
* Permit update of file attributes when files have a valid EVM signature,
* except in the case of them having an immutable portable signature.
*/
-int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
+int evm_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
enum integrity_status evm_status;
@@ -801,7 +803,7 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
- !evm_attr_change(dentry, attr))
+ !evm_attr_change(mnt_userns, dentry, attr))
return 0;
integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 73917413365b..a8802b8da946 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -2247,6 +2247,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
if (id >= READING_MAX_ID)
return false;
+ if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
+ && security_locked_down(LOCKDOWN_KEXEC))
+ return false;
+
func = read_idmap[id] ?: FILE_CHECK;
rcu_read_lock();
diff --git a/security/security.c b/security/security.c
index 188b8f782220..f85afb02ea1c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1324,7 +1324,8 @@ int security_inode_permission(struct inode *inode, int mask)
return call_int_hook(inode_permission, 0, inode, mask);
}
-int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
+int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
int ret;
@@ -1333,7 +1334,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
ret = call_int_hook(inode_setattr, 0, dentry, attr);
if (ret)
return ret;
- return evm_inode_setattr(dentry, attr);
+ return evm_inode_setattr(mnt_userns, dentry, attr);
}
EXPORT_SYMBOL_GPL(security_inode_setattr);
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index 99a128a666fb..4ce5d2579387 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -13,7 +13,6 @@
#include <linux/of_gpio.h>
#include <linux/of_device.h>
#include <linux/clk.h>
-#include <linux/pinctrl/consumer.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/spinlock.h>
@@ -55,40 +54,8 @@ struct rk_i2s_dev {
const struct rk_i2s_pins *pins;
unsigned int bclk_ratio;
spinlock_t lock; /* tx/rx lock */
- struct pinctrl *pinctrl;
- struct pinctrl_state *bclk_on;
- struct pinctrl_state *bclk_off;
};
-static int i2s_pinctrl_select_bclk_on(struct rk_i2s_dev *i2s)
-{
- int ret = 0;
-
- if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_on))
- ret = pinctrl_select_state(i2s->pinctrl,
- i2s->bclk_on);
-
- if (ret)
- dev_err(i2s->dev, "bclk enable failed %d\n", ret);
-
- return ret;
-}
-
-static int i2s_pinctrl_select_bclk_off(struct rk_i2s_dev *i2s)
-{
-
- int ret = 0;
-
- if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_off))
- ret = pinctrl_select_state(i2s->pinctrl,
- i2s->bclk_off);
-
- if (ret)
- dev_err(i2s->dev, "bclk disable failed %d\n", ret);
-
- return ret;
-}
-
static int i2s_runtime_suspend(struct device *dev)
{
struct rk_i2s_dev *i2s = dev_get_drvdata(dev);
@@ -125,49 +92,38 @@ static inline struct rk_i2s_dev *to_info(struct snd_soc_dai *dai)
return snd_soc_dai_get_drvdata(dai);
}
-static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
+static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
{
unsigned int val = 0;
int retry = 10;
- int ret = 0;
spin_lock(&i2s->lock);
if (on) {
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
- I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
+ I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
- I2S_XFER_TXS_START | I2S_XFER_RXS_START,
- I2S_XFER_TXS_START | I2S_XFER_RXS_START);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_XFER,
+ I2S_XFER_TXS_START | I2S_XFER_RXS_START,
+ I2S_XFER_TXS_START | I2S_XFER_RXS_START);
i2s->tx_start = true;
} else {
i2s->tx_start = false;
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
- I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
+ I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE);
if (!i2s->rx_start) {
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
- I2S_XFER_TXS_START |
- I2S_XFER_RXS_START,
- I2S_XFER_TXS_STOP |
- I2S_XFER_RXS_STOP);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_XFER,
+ I2S_XFER_TXS_START |
+ I2S_XFER_RXS_START,
+ I2S_XFER_TXS_STOP |
+ I2S_XFER_RXS_STOP);
udelay(150);
- ret = regmap_update_bits(i2s->regmap, I2S_CLR,
- I2S_CLR_TXC | I2S_CLR_RXC,
- I2S_CLR_TXC | I2S_CLR_RXC);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_CLR,
+ I2S_CLR_TXC | I2S_CLR_RXC,
+ I2S_CLR_TXC | I2S_CLR_RXC);
regmap_read(i2s->regmap, I2S_CLR, &val);
@@ -182,57 +138,44 @@ static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
}
}
}
-end:
spin_unlock(&i2s->lock);
- if (ret < 0)
- dev_err(i2s->dev, "lrclk update failed\n");
-
- return ret;
}
-static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
+static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
{
unsigned int val = 0;
int retry = 10;
- int ret = 0;
spin_lock(&i2s->lock);
if (on) {
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE);
- if (ret < 0)
- goto end;
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
+ regmap_update_bits(i2s->regmap, I2S_XFER,
I2S_XFER_TXS_START | I2S_XFER_RXS_START,
I2S_XFER_TXS_START | I2S_XFER_RXS_START);
- if (ret < 0)
- goto end;
i2s->rx_start = true;
} else {
i2s->rx_start = false;
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_DISABLE);
- if (ret < 0)
- goto end;
if (!i2s->tx_start) {
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
+ regmap_update_bits(i2s->regmap, I2S_XFER,
I2S_XFER_TXS_START |
I2S_XFER_RXS_START,
I2S_XFER_TXS_STOP |
I2S_XFER_RXS_STOP);
- if (ret < 0)
- goto end;
+
udelay(150);
- ret = regmap_update_bits(i2s->regmap, I2S_CLR,
+ regmap_update_bits(i2s->regmap, I2S_CLR,
I2S_CLR_TXC | I2S_CLR_RXC,
I2S_CLR_TXC | I2S_CLR_RXC);
- if (ret < 0)
- goto end;
+
regmap_read(i2s->regmap, I2S_CLR, &val);
+
/* Should wait for clear operation to finish */
while (val) {
regmap_read(i2s->regmap, I2S_CLR, &val);
@@ -244,12 +187,7 @@ static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
}
}
}
-end:
spin_unlock(&i2s->lock);
- if (ret < 0)
- dev_err(i2s->dev, "lrclk update failed\n");
-
- return ret;
}
static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
@@ -487,26 +425,17 @@ static int rockchip_i2s_trigger(struct snd_pcm_substream *substream,
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
- ret = rockchip_snd_rxctrl(i2s, 1);
+ rockchip_snd_rxctrl(i2s, 1);
else
- ret = rockchip_snd_txctrl(i2s, 1);
- /* Do not turn on bclk if lrclk open fails. */
- if (ret < 0)
- return ret;
- i2s_pinctrl_select_bclk_on(i2s);
+ rockchip_snd_txctrl(i2s, 1);
break;
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
- if (!i2s->tx_start)
- i2s_pinctrl_select_bclk_off(i2s);
- ret = rockchip_snd_rxctrl(i2s, 0);
- } else {
- if (!i2s->rx_start)
- i2s_pinctrl_select_bclk_off(i2s);
- ret = rockchip_snd_txctrl(i2s, 0);
- }
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+ rockchip_snd_rxctrl(i2s, 0);
+ else
+ rockchip_snd_txctrl(i2s, 0);
break;
default:
ret = -EINVAL;
@@ -807,33 +736,6 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
}
i2s->bclk_ratio = 64;
- i2s->pinctrl = devm_pinctrl_get(&pdev->dev);
- if (IS_ERR(i2s->pinctrl))
- dev_err(&pdev->dev, "failed to find i2s pinctrl\n");
-
- i2s->bclk_on = pinctrl_lookup_state(i2s->pinctrl,
- "bclk_on");
- if (IS_ERR_OR_NULL(i2s->bclk_on))
- dev_err(&pdev->dev, "failed to find i2s default state\n");
- else
- dev_dbg(&pdev->dev, "find i2s bclk state\n");
-
- i2s->bclk_off = pinctrl_lookup_state(i2s->pinctrl,
- "bclk_off");
- if (IS_ERR_OR_NULL(i2s->bclk_off))
- dev_err(&pdev->dev, "failed to find i2s gpio state\n");
- else
- dev_dbg(&pdev->dev, "find i2s bclk_off state\n");
-
- i2s_pinctrl_select_bclk_off(i2s);
-
- i2s->playback_dma_data.addr = res->start + I2S_TXDR;
- i2s->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- i2s->playback_dma_data.maxburst = 4;
-
- i2s->capture_dma_data.addr = res->start + I2S_RXDR;
- i2s->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- i2s->capture_dma_data.maxburst = 4;
dev_set_drvdata(&pdev->dev, i2s);
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 00f5227c8459..a77b915d36a8 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -302,6 +302,7 @@
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
index 0197042b7dfb..1ecdb911add8 100644
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
@@ -90,7 +91,7 @@
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
@@ -115,11 +116,13 @@
#define F_GETSIG 11 /* for sockets. */
#endif
+#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
+#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
@@ -178,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
@@ -185,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -209,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 811897dadcae..860f867c50c0 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -2084,7 +2084,7 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 5f57d9829956..4339692a8d0b 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -61,7 +61,7 @@ def get_optional(perf_dict, field):
def get_offset(perf_dict, field):
if field in perf_dict:
- return f"+0x{perf_dict[field]:x}"
+ return "+%#x" % perf_dict[field]
return ""
def get_dso_file_path(dso_name, dso_build_id):
@@ -76,7 +76,7 @@ def get_dso_file_path(dso_name, dso_build_id):
else:
append = "/elf"
- dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append;
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
@@ -94,8 +94,8 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
- f"--start-address=0x{start_addr:x}",
- f"--stop-address=0x{stop_addr:x}" ]
+ "--start-address="+format(start_addr,"#x"),
+ "--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
@@ -109,12 +109,14 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
m = disasm_re.search(line)
if m is None:
continue
- print(f"\t{line}")
+ print("\t" + line)
def print_sample(sample):
- print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
- f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
- f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+ print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
+ "pid: %d tid: %d period: %d time: %d }" % \
+ (sample['cpu'], sample['addr'], sample['phys_addr'], \
+ sample['ip'], sample['pid'], sample['tid'], \
+ sample['period'], sample['time']))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
@@ -131,7 +133,7 @@ def common_start_str(comm, sample):
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+ return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns)
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
@@ -171,7 +173,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
glb_line_number = line_number
glb_source_file_name = source_file_name
- print(f"{start_str}{src_str}")
+ print(start_str, src_str)
def process_event(param_dict):
global cache_size
@@ -188,7 +190,7 @@ def process_event(param_dict):
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
- print(f"Event type: {name}")
+ print("Event type: %s" % name)
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
@@ -197,7 +199,7 @@ def process_event(param_dict):
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
- print(f"Failed to find valid dso map for dso {dso}")
+ print("Failed to find valid dso map for dso %s" % dso)
return
if (name[0:12] == "instructions"):
@@ -244,15 +246,15 @@ def process_event(param_dict):
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
- print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
- print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
- print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
if (options.objdump_name != None):
@@ -267,6 +269,6 @@ def process_event(param_dict):
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
- print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea247..cdd6463a5b68 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -63,20 +63,16 @@ static struct hashmap *bpf_map_hash;
static struct bpf_perf_object *
bpf_perf_object__next(struct bpf_perf_object *prev)
{
- struct bpf_perf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_perf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ if (!prev) {
+ if (list_empty(&bpf_objects_list))
+ return NULL;
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
+ return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+ }
+ if (list_is_last(&prev->list, &bpf_objects_list))
return NULL;
- return next;
+ return list_next_entry(prev, list);
}
#define bpf_perf_object__for_each(perf_obj, tmp) \
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ecd377938eea..b3be5b1d9dbb 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
return NULL;
}
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
static bool want_demangle(bool is_kernel_sym)
{
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
sym.st_value);
used_opd = true;
}
+
/*
* When loading symbols in a data mapping, ABS symbols (which
* has a value of SHN_ABS in its st_shndx) failed at
@@ -1227,6 +1255,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
gelf_getshdr(sec, &shdr);
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
secstrs = secstrs_sym;
/*
@@ -1262,11 +1301,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(syms_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_warning("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ continue;
+ }
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
- (u64)sym.st_value, (u64)shdr.sh_addr,
- (u64)shdr.sh_offset);
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
}
demangled = demangle_sym(dso, kmodule, elf_name);
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 71b306602368..616ed4019655 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -3,6 +3,6 @@
TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
-CFLAGS += -O2 -g -Wall -I../../../../usr/include/
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES)
include ../lib.mk
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..2237d1aac801 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -82,8 +82,9 @@ static int next_cpu(int cpu)
return cpu;
}
-static void *migration_worker(void *ign)
+static void *migration_worker(void *__rseq_tid)
{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
cpu_set_t allowed_mask;
int r, i, cpu;
@@ -106,7 +107,7 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -231,7 +232,8 @@ int main(int argc, char *argv[])
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
- pthread_create(&migration_thread, NULL, migration_worker, 0);
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)gettid());
for (i = 0; !done; i++) {
vcpu_run(vm, VCPU_ID);