summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/adi,adin.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt32
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ravb.txt1
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml2
-rw-r--r--Documentation/networking/devlink-trap.rst3
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/powerpc/include/asm/cputable.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h100
-rw-r--r--arch/powerpc/include/asm/reg.h9
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c35
-rw-r--r--arch/powerpc/kernel/eeh.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c42
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S10
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c31
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c15
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c84
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c34
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c163
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c72
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c6
-rw-r--r--arch/x86/purgatory/Makefile1
-rw-r--r--drivers/atm/he.c2
-rw-r--r--drivers/infiniband/core/addr.c2
-rw-r--r--drivers/iommu/amd_iommu.c229
-rw-r--r--drivers/iommu/amd_iommu_types.h4
-rw-r--r--drivers/isdn/mISDN/socket.c2
-rw-r--r--drivers/net/Kconfig2
-rw-r--r--drivers/net/arcnet/Kconfig26
-rw-r--r--drivers/net/arcnet/arcnet.c31
-rw-r--r--drivers/net/can/usb/Kconfig8
-rw-r--r--drivers/net/dsa/b53/b53_serdes.h4
-rw-r--r--drivers/net/dsa/lantiq_pce.h2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h2
-rw-r--r--drivers/net/dsa/qca8k.c3
-rw-r--r--drivers/net/dsa/sja1105/Kconfig1
-rw-r--r--drivers/net/ethernet/Kconfig11
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/allwinner/Kconfig10
-rw-r--r--drivers/net/ethernet/amazon/Kconfig1
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_vec.c15
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c2
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/Kconfig2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c269
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h6
-rw-r--r--drivers/net/ethernet/marvell/skge.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/cls.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c7
-rw-r--r--drivers/net/ethernet/netx-eth.c497
-rw-r--r--drivers/net/ethernet/ni/nixge.c2
-rw-r--r--drivers/net/ethernet/nxp/Kconfig8
-rw-r--r--drivers/net/ethernet/pensando/Kconfig4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_debugfs.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c3
-rw-r--r--drivers/net/ethernet/socionext/netsec.c2
-rw-r--r--drivers/net/ethernet/socionext/sni_ave.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c16
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c2
-rw-r--r--drivers/net/macsec.c1
-rw-r--r--drivers/net/phy/Kconfig6
-rw-r--r--drivers/net/phy/micrel.c3
-rw-r--r--drivers/net/phy/national.c9
-rw-r--r--drivers/net/ppp/ppp_generic.c2
-rw-r--r--drivers/net/tap.c2
-rw-r--r--drivers/net/usb/cdc_ncm.c6
-rw-r--r--drivers/net/usb/usbnet.c8
-rw-r--r--drivers/net/vrf.c3
-rw-r--r--drivers/net/wireless/ath/Kconfig2
-rw-r--r--drivers/net/wireless/ath/ar5523/Kconfig4
-rw-r--r--drivers/net/wireless/ath/ath6kl/Kconfig2
-rw-r--r--drivers/net/wireless/ath/ath9k/Kconfig2
-rw-r--r--drivers/net/wireless/ath/carl9170/Kconfig6
-rw-r--r--drivers/net/wireless/ath/wil6210/txrx.c2
-rw-r--r--drivers/net/wireless/atmel/Kconfig32
-rw-r--r--drivers/net/wireless/intel/ipw2x00/Kconfig116
-rw-r--r--drivers/net/wireless/intel/iwlegacy/Kconfig6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Kconfig6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tt.c9
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mcu.c11
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h6
-rw-r--r--drivers/net/wireless/ralink/rt2x00/Kconfig24
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac.c3
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.c4
-rw-r--r--drivers/net/wireless/realtek/rtw88/pci.c48
-rw-r--r--drivers/net/wireless/zydas/zd1211rw/zd_usb.c2
-rw-r--r--drivers/nfc/st95hf/core.c2
-rw-r--r--drivers/of/of_mdio.c2
-rw-r--r--drivers/ptp/ptp_chardev.c4
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h28
-rw-r--r--include/linux/mm_types.h14
-rw-r--r--include/linux/platform_data/eth-netx.h13
-rw-r--r--include/linux/rcuwait.h20
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/sched/mm.h10
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/skbuff.h9
-rw-r--r--include/net/inet_timewait_sock.h1
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/netfilter/nf_tables.h6
-rw-r--r--include/net/route.h3
-rw-r--r--include/net/sch_generic.h5
-rw-r--r--include/uapi/linux/btf.h4
-rw-r--r--include/uapi/linux/netfilter_bridge/ebtables.h6
-rw-r--r--include/uapi/linux/ptp_clock.h22
-rw-r--r--kernel/bpf/btf.c7
-rw-r--r--kernel/bpf/xskmap.c2
-rw-r--r--kernel/exit.c74
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/sched/core.c28
-rw-r--r--kernel/sched/fair.c39
-rw-r--r--kernel/sched/membarrier.c239
-rw-r--r--kernel/sched/sched.h34
-rw-r--r--kernel/trace/bpf_trace.c26
-rw-r--r--lib/Kconfig5
-rw-r--r--mm/huge_memory.c51
-rw-r--r--mm/mempolicy.c45
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/shmem.c2
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/Kconfig10
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/dst.c4
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dccp/ipv6.c5
-rw-r--r--net/ieee802154/socket.c3
-rw-r--r--net/ife/Kconfig2
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/route.c36
-rw-r--r--net/ipv4/tcp_bbr.c8
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/fib6_rules.c3
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/netfilter/Kconfig16
-rw-r--r--net/ipv6/tcp_ipv6.c24
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/ncsi/internal.h7
-rw-r--r--net/ncsi/ncsi-manage.c98
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipvs/Kconfig6
-rw-r--r--net/netfilter/nf_tables_api.c25
-rw-r--r--net/netfilter/nf_tables_offload.c2
-rw-r--r--net/netfilter/nft_flow_offload.c19
-rw-r--r--net/netfilter/nft_lookup.c3
-rw-r--r--net/nfc/llcp_sock.c7
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/qrtr/qrtr.c1
-rw-r--r--net/rds/Kconfig4
-rw-r--r--net/rds/bind.c5
-rw-r--r--net/sched/Kconfig145
-rw-r--r--net/sched/act_api.c34
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/cls_api.c6
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cbs.c30
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_multiq.c23
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_sfb.c7
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/xdp/xdp_umem.c2
-rw-r--r--tools/lib/bpf/btf_dump.c94
-rw-r--r--tools/lib/bpf/xsk.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c21
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h5
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh7
-rw-r--r--tools/testing/selftests/membarrier/.gitignore3
-rw-r--r--tools/testing/selftests/membarrier/Makefile5
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h (renamed from tools/testing/selftests/membarrier/membarrier_test.c)40
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c73
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c24
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh6
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh14
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh21
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c734
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c179
-rw-r--r--usr/include/Makefile1
223 files changed, 3308 insertions, 1786 deletions
diff --git a/Documentation/devicetree/bindings/net/adi,adin.yaml b/Documentation/devicetree/bindings/net/adi,adin.yaml
index 69375cb28e92..d95cc691a65f 100644
--- a/Documentation/devicetree/bindings/net/adi,adin.yaml
+++ b/Documentation/devicetree/bindings/net/adi,adin.yaml
@@ -36,12 +36,6 @@ properties:
enum: [ 4, 8, 12, 16, 20, 24 ]
default: 8
- adi,disable-energy-detect:
- description: |
- Disables Energy Detect Powerdown Mode (default disabled, i.e energy detect
- is enabled if this property is unspecified)
- type: boolean
-
examples:
- |
ethernet {
@@ -68,6 +62,5 @@ examples:
reg = <1>;
adi,fifo-depth-bits = <16>;
- adi,disable-energy-detect;
};
};
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
index 5100358177c9..b921731cd970 100644
--- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -12,8 +12,36 @@ and therefore may overwrite them.
KSZ9021:
All skew control options are specified in picoseconds. The minimum
- value is 0, the maximum value is 3000, and it is incremented by 200ps
- steps.
+ value is 0, the maximum value is 3000, and it can be specified in 200ps
+ steps, *but* these values are in not fact what you get because this chip's
+ skew values actually increase in 120ps steps, starting from -840ps. The
+ incorrect values came from an error in the original KSZ9021 datasheet
+ before it was corrected in revision 1.2 (Feb 2014), but it is too late to
+ change the driver now because of the many existing device trees that have
+ been created using values that go up in increments of 200.
+
+ The following table shows the actual skew delay you will get for each of the
+ possible devicetree values, and the number that will be programmed into the
+ corresponding pad skew register:
+
+ Device Tree Value Delay Pad Skew Register Value
+ -----------------------------------------------------
+ 0 -840ps 0000
+ 200 -720ps 0001
+ 400 -600ps 0010
+ 600 -480ps 0011
+ 800 -360ps 0100
+ 1000 -240ps 0101
+ 1200 -120ps 0110
+ 1400 0ps 0111
+ 1600 120ps 1000
+ 1800 240ps 1001
+ 2000 360ps 1010
+ 2200 480ps 1011
+ 2400 600ps 1100
+ 2600 720ps 1101
+ 2800 840ps 1110
+ 3000 960ps 1111
Optional properties:
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 7ad36213093e..5df4aa7f6811 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -18,6 +18,7 @@ Required properties:
R-Car Gen2 and RZ/G1 devices.
- "renesas,etheravb-r8a774a1" for the R8A774A1 SoC.
+ - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC.
- "renesas,etheravb-r8a774c0" for the R8A774C0 SoC.
- "renesas,etheravb-r8a7795" for the R8A7795 SoC.
- "renesas,etheravb-r8a7796" for the R8A7796 SoC.
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index ebe4537a7cce..4845e29411e4 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -113,7 +113,7 @@ properties:
const: stmmaceth
mac-mode:
- maxItems: 1
+ $ref: ethernet-controller.yaml#/properties/phy-connection-type
description:
The property is identical to 'phy-mode', and assumes that there is mode
converter in-between the MAC & PHY (e.g. GMII-to-RGMII). This converter
diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink-trap.rst
index c20c7c483664..8e90a85f3bd5 100644
--- a/Documentation/networking/devlink-trap.rst
+++ b/Documentation/networking/devlink-trap.rst
@@ -143,7 +143,8 @@ be added to the following table:
* - ``port_list_is_empty``
- ``drop``
- Traps packets that the device decided to drop in case they need to be
- flooded and the flood list is empty
+ flooded (e.g., unknown unicast, unregistered multicast) and there are
+ no ports the packets should be flooded to
* - ``port_loopback_filter``
- ``drop``
- Traps packets that the device decided to drop in case after layer 2
diff --git a/MAINTAINERS b/MAINTAINERS
index 857611c746c4..296de2b51c83 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -643,6 +643,7 @@ F: drivers/net/ethernet/alacritech/*
FORCEDETH GIGABIT ETHERNET DRIVER
M: Rain River <rain.1986.08.12@gmail.com>
+M: Zhu Yanjun <yanjun.zhu@oracle.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/nvidia/*
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a1ebcbc3931f..cf00ff0d121d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -209,8 +209,9 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
+#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
#ifndef __ASSEMBLY__
@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
- CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
+ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 8e8514efb124..ee62776e5433 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void)
return xirr;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * 105: smp_muxed_ipi_set_message():
+ * 105: smb_mb()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * --105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 42: // returns to executing guest
+ * | // RE-ORDERED STORE COMPLETES
+ * ->105: message[CALL_FUNCTION] = 1
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * -- 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 105: smp_muxed_ipi_set_message():
+ * | 105: smb_mb()
+ * | 105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | // RE-ORDERED STORE COMPLETES
+ * -> 42: kvmppc_clear_host_ipi(42)
+ * 42: // returns to executing guest
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
{
- paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
+ /*
+ * order stores of IPI messages vs. setting of host_ipi flag
+ *
+ * pairs with the barrier in kvmppc_clear_host_ipi()
+ */
+ smp_mb();
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
+ /*
+ * order clearing of host_ipi flag vs. processing of IPI messages
+ *
+ * pairs with the barrier in kvmppc_set_host_ipi()
+ */
+ smp_mb();
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -486,7 +577,10 @@ static inline u32 kvmppc_get_xics_latch(void)
return 0;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index ec3714cf0989..b3cbb1136bce 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -475,9 +475,10 @@
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
-#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
-#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
-#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */
+#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */
+#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
/*
* These bits are used in the function kvmppc_set_arch_compat() to specify and
* determine both the compatibility level which we want to emulate and the
@@ -486,6 +487,8 @@
#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
+#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205)
+#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..a460298c7ddb 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 804b1a6196fa..f17ff1200eaa 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..180b3a5d1001 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
@@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
return true;
}
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
static __init void cpufeatures_cpu_quirks(void)
{
- int version = mfspr(SPRN_PVR);
+ unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
@@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void)
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
+ update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 0a91dee51245..bc8a551013be 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1960,7 +1960,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
pci_err(pdev, "Going to break: %pR\n", bar);
if (pdev->is_virtfn) {
-#ifndef CONFIG_IOV
+#ifndef CONFIG_PCI_IOV
return -ENXIO;
#else
/*
@@ -1980,7 +1980,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
pos += PCI_SRIOV_CTRL;
bit = PCI_SRIOV_CTRL_MSE;
-#endif /* !CONFIG_IOV */
+#endif /* !CONFIG_PCI_IOV */
} else {
bit = PCI_COMMAND_MEMORY;
pos = PCI_COMMAND;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index efd8f93bc9dc..709cf1fd4cf4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -401,8 +401,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
- /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
- vc->pcr = host_pcr_bit - guest_pcr_bit;
+ /*
+ * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
+ * Also set all reserved PCR bits
+ */
+ vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
spin_unlock(&vc->lock);
return 0;
@@ -3410,7 +3413,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
}
if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr);
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
@@ -3490,7 +3493,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index fff90f2c3de2..cdf30c6eaf54 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- hr->pcr = vc->pcr;
+ hr->pcr = vc->pcr | PCR_MASK;
hr->dpdes = vc->dpdes;
hr->hfscr = vcpu->arch.hfscr;
hr->tb_offset = vc->tb_offset;
@@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->lpid = swab32(hr->lpid);
hr->vcpu_token = swab32(hr->vcpu_token);
hr->lpcr = swab64(hr->lpcr);
- hr->pcr = swab64(hr->pcr);
+ hr->pcr = swab64(hr->pcr) | PCR_MASK;
hr->amor = swab64(hr->amor);
hr->dpdes = swab64(hr->dpdes);
hr->hfscr = swab64(hr->hfscr);
@@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- vc->pcr = hr->pcr;
+ vc->pcr = hr->pcr | PCR_MASK;
vc->dpdes = hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
vcpu->arch.dawr = hr->dawr0;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7186c65c61c9..220305454c23 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
+static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
+{
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+ "r" (rb_value), "r" (lpid));
+ }
+}
+
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
}
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
- /*
- * Need the extra ptesync to make sure we don't
- * re-order the tlbie
- */
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
- "r" (rbvalues[0]), "r" (kvm->arch.lpid));
- }
-
+ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} else {
if (need_sync)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 4d2ec77d806c..287d5911df0f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
- kvmppc_set_host_ipi(hcpu, 1);
+ kvmppc_set_host_ipi(hcpu);
smp_mb();
kvmhv_rm_send_ipi(hcpu);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9a05b0d932ef..74a9cfe84aee 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -644,8 +644,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Load guest PCR value to select appropriate compat mode */
37: ld r7, VCORE_PCR(r5)
- cmpdi r7, 0
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r7, r6
beq 38f
+ or r7, r7, r6
mtspr SPRN_PCR, r7
38:
@@ -1913,10 +1915,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Reset PCR */
ld r0, VCORE_PCR(r5)
- cmpdi r0, 0
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r0, r6
beq 18f
- li r0, 0
- mtspr SPRN_PCR, r0
+ mtspr SPRN_PCR, r6
18:
/* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 90ab4f31e2b3..523e42eb11da 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -197,9 +197,32 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
return va;
}
-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
+ int apsize, int ssize)
{
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/* Need the extra ptesync to ensure we don't reorder tlbie*/
asm volatile("ptesync": : :"memory");
___tlbie(vpn, psize, apsize, ssize);
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
} else {
__tlbie(vpn, psize, apsize, ssize);
- fixup_tlbie(vpn, psize, apsize, ssize);
+ fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
/*
* Just do one more with the last used values.
*/
- fixup_tlbie(vpn, psize, psize, ssize);
+ fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 2d0cb5ba9a47..0ba30b8b935b 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
+ /*
+ * For tasks which were successfully initialized we end up calling
+ * arch_exit_mmap() which clears the process table entry. And
+ * arch_exit_mmap() is called before the required fullmm TLB flush
+ * which does a RIC=2 flush. Hence for an initialized task, we do clear
+ * any cached process table entries.
+ *
+ * The condition below handles the error case during task init. We have
+ * set the process table entry early and if we fail a task
+ * initialization, we need to ensure the process table entry is zeroed.
+ * We need not worry about process table entry caches because the task
+ * never ran with the PID value.
+ */
if (radix_enabled())
- WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+ process_tb[mm->context.id].prtb0 = 0;
else
subpage_prot_free(mm);
destroy_contexts(&mm->context);
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 631be42abd33..67af871190c6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -196,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void fixup_tlbie(void)
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
{
- unsigned long pid = 0;
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_lpid(unsigned long lpid)
{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid(pid);
}
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_lpid(lpid);
}
- fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range(addr - page_size, pid, ap);
}
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
- fixup_tlbie();
+ fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
- fixup_tlbie_lpid(lpid);
+ fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -856,7 +920,7 @@ is_local:
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
- fixup_tlbie();
+
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 802387b231ad..0e6ed4413eea 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -12,6 +12,14 @@
#include <asm/code-patching.h>
#include <mm/mmu_decl.h>
+static pgprot_t kasan_prot_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return PAGE_READONLY;
+
+ return PAGE_KERNEL_RO;
+}
+
static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
{
unsigned long va = (unsigned long)kasan_early_shadow_page;
@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
{
pmd_t *pmd;
unsigned long k_cur, k_next;
+ pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL;
pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
if (!new)
return -ENOMEM;
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- kasan_populate_pte(new, PAGE_READONLY);
- else
- kasan_populate_pte(new, PAGE_KERNEL_RO);
+ kasan_populate_pte(new, prot);
smp_wmb(); /* See comment in __pte_alloc */
@@ -103,11 +109,23 @@ static int __ref kasan_init_region(void *start, size_t size)
static void __init kasan_remap_early_shadow_ro(void)
{
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
- else
- kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+ pgprot_t prot = kasan_prot_ro();
+ unsigned long k_start = KASAN_SHADOW_START;
+ unsigned long k_end = KASAN_SHADOW_END;
+ unsigned long k_cur;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ kasan_populate_pte(kasan_early_shadow_pte, prot);
+
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+ if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
+ continue;
+ __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+ }
flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 94cd96b9b7bb..fbd6e6b7bbf2 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -193,7 +193,7 @@ static void pnv_smp_cpu_kill_self(void)
* for coming online, which are handled via
* generic_check_cpu_restart() calls.
*/
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
srr1 = pnv_cpu_offline(cpu);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 36b846f6e74e..b53359258d99 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static u8 dtl_mask = DTL_LOG_PREEMPT;
#else
@@ -984,6 +1000,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
/**
* H_BLOCK_REMOVE caller.
* @idx should point to the latest @param entry set with a PTEX.
@@ -1143,7 +1170,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ /* Assuming THP size is 16M */
+ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
hugepage_block_invalidate(slot, vpn, count, psize, ssize);
else
hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
@@ -1312,6 +1340,137 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
}
/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ * TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ * MSB=0 means 4k segment base page size and actual page size
+ * MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+ unsigned int block_size)
+{
+ if (block_size > hblkrm_size[bpsize][psize])
+ hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ * - bit 7 is the L bit
+ * - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK 0x80
+#define HBLKRM_PENC_MASK 0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+ unsigned int block_size)
+{
+ unsigned int bpsize, psize;
+
+ /* First, check the L bit, if not set, this means 4K */
+ if ((lp & HBLKRM_L_MASK) == 0) {
+ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+ return;
+ }
+
+ lp &= HBLKRM_PENC_MASK;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (def->penc[psize] == lp) {
+ set_hblkrm_bloc_size(bpsize, psize, block_size);
+ return;
+ }
+ }
+ }
+}
+
+#define SPLPAR_TLB_BIC_TOKEN 50
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH 128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+ unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
+ int call_status, len, idx, bpsize;
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_TLB_BIC_TOKEN,
+ __pa(rtas_data_buf),
+ RTAS_DATA_BUF_SIZE);
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
+ local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __func__, call_status);
+ return;
+ }
+
+ /*
+ * The first two (2) bytes of the data in the buffer are the length of
+ * the returned data, not counting these first two (2) bytes.
+ */
+ len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
+ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+ pr_warn("%s too large returned buffer %d", __func__, len);
+ return;
+ }
+
+ idx = 2;
+ while (idx < len) {
+ u8 block_shift = local_buffer[idx++];
+ u32 block_size;
+ unsigned int npsize;
+
+ if (!block_shift)
+ break;
+
+ block_size = 1 << block_shift;
+
+ for (npsize = local_buffer[idx++];
+ npsize > 0 && idx < len; npsize--)
+ check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
+ block_size);
+ }
+
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+ if (hblkrm_size[bpsize][idx])
+ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+ bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
+/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
*/
@@ -1330,7 +1489,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
do_block_remove(number, batch, param);
goto out;
}
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..61883291defc 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -65,29 +65,21 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
cond_resched();
} while (rc == H_BUSY);
- if (rc) {
- /* H_OVERLAP needs a separate error path */
- if (rc == H_OVERLAP)
- return -EBUSY;
-
- dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
- return -ENXIO;
- }
+ if (rc)
+ return rc;
p->bound_addr = saved;
-
- dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
-
- return 0;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
+ return rc;
}
-static int drc_pmem_unbind(struct papr_scm_priv *p)
+static void drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
uint64_t token = 0;
int64_t rc;
- dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
/* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
@@ -110,12 +102,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
else
- dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
p->drc_index);
- return rc == H_SUCCESS ? 0 : -ENXIO;
+ return;
}
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+ unsigned long start_addr;
+ unsigned long end_addr;
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ int64_t rc;
+
+
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, 0);
+ if (rc)
+ goto err_out;
+ start_addr = ret[0];
+
+ /* Make sure the full region is bound. */
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, p->blocks - 1);
+ if (rc)
+ goto err_out;
+ end_addr = ret[0];
+
+ if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+ goto err_out;
+
+ p->bound_addr = start_addr;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
+ return rc;
+
+err_out:
+ dev_info(&p->pdev->dev,
+ "Failed to query, trying an unbind followed by bind");
+ drc_pmem_unbind(p);
+ return drc_pmem_bind(p);
+}
+
+
static int papr_scm_meta_get(struct papr_scm_priv *p,
struct nd_cmd_get_config_data_hdr *hdr)
{
@@ -436,14 +464,14 @@ static int papr_scm_probe(struct platform_device *pdev)
rc = drc_pmem_bind(p);
/* If phyp says drc memory still bound then force unbound and retry */
- if (rc == -EBUSY) {
- dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
- drc_pmem_unbind(p);
- rc = drc_pmem_bind(p);
- }
+ if (rc == H_OVERLAP)
+ rc = drc_pmem_query_n_bind(p);
- if (rc)
+ if (rc != H_SUCCESS) {
+ dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+ rc = -ENXIO;
goto err;
+ }
/* setup the resource for the newly bound range */
p->res.start = p->bound_addr;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index a6624d4bd9d0..13fa370a87e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
void pseries_setup_rfi_flush(void);
+void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f8adcd0e4589..0a40201f315f 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -744,6 +744,7 @@ static void __init pSeries_setup_arch(void)
pseries_setup_rfi_flush();
setup_stf_barrier();
+ pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 485569ff7ef1..7d13d2ef5a90 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu)
{
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
@@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 8bb8dd7dd6ad..68fd2540b093 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu)
{
int hw_cpu = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
}
@@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
return smp_ipi_demux();
@@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, "
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 527749066d31..fb4ee5444379 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -25,6 +25,7 @@ KCOV_INSTRUMENT := n
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 70b00ae4ec38..8af793f5e811 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1690,7 +1690,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
if (RBRQ_HBUF_ERR(he_dev->rbrq_head)) {
hprintk("HBUF_ERR! (cid 0x%x)\n", cid);
- atomic_inc(&vcc->stats->rx_drop);
+ atomic_inc(&vcc->stats->rx_drop);
goto return_host_buffers;
}
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1dd467bed8fc..6d7ec371e7b2 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
- return rt->rt_gw_family == AF_INET;
+ return rt->rt_uses_gateway;
}
rt6 = container_of(dst, struct rt6_info, dst);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 97975bb7f347..2369b8af81f3 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -70,7 +70,6 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
if (!dev_data)
return NULL;
+ spin_lock_init(&dev_data->lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev)
*/
}
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
/****************************************************************************
*
* Interrupt handling functions
@@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain,
dma_addr_t iova, size_t size)
{
if (unlikely(amd_iommu_np_cache)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
domain_flush_complete(domain);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
}
@@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain)
BUG_ON(domain->mode < PAGE_MODE_NONE ||
domain->mode > PAGE_MODE_6_LEVEL);
- free_sub_pt(root, domain->mode, freelist);
+ freelist = free_sub_pt(root, domain->mode, freelist);
free_page_list(freelist);
}
@@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain)
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits.
*/
-static void increase_address_space(struct protection_domain *domain,
+static bool increase_address_space(struct protection_domain *domain,
gfp_t gfp)
{
unsigned long flags;
+ bool ret = false;
u64 *pte;
spin_lock_irqsave(&domain->lock, flags);
@@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain,
iommu_virt_to_phys(domain->pt_root));
domain->pt_root = pte;
domain->mode += 1;
- domain->updated = true;
+
+ ret = true;
out:
spin_unlock_irqrestore(&domain->lock, flags);
- return;
+ return ret;
}
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address,
unsigned long page_size,
u64 **pte_page,
- gfp_t gfp)
+ gfp_t gfp,
+ bool *updated)
{
int level, end_lvl;
u64 *pte, *page;
@@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode))
- increase_address_space(domain, gfp);
+ *updated = increase_address_space(domain, gfp) || *updated;
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain,
__pte = *pte;
pte_level = PM_PTE_LEVEL(__pte);
- if (!IOMMU_PTE_PRESENT(__pte) ||
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
page = (u64 *)get_zeroed_page(gfp);
+
if (!page)
return NULL;
@@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
/* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page);
- else if (pte_level == PAGE_MODE_7_LEVEL)
- domain->updated = true;
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
continue;
}
@@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain,
*page_size = PTE_LEVEL_PAGE_SIZE(level);
}
- if (PM_PTE_LEVEL(*pte) == 0x07) {
- unsigned long pte_mask;
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- *page_size = pte_mask = PTE_PAGE_SIZE(*pte);
- pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
- pte = (u64 *)(((unsigned long)pte) & pte_mask);
- }
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
return pte;
}
@@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom,
gfp_t gfp)
{
struct page *freelist = NULL;
+ bool updated = false;
u64 __pte, *pte;
- int i, count;
+ int ret, i, count;
BUG_ON(!IS_ALIGNED(bus_addr, page_size));
BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+ ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
- return -EINVAL;
+ goto out;
count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+ pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+ ret = -ENOMEM;
if (!pte)
- return -ENOMEM;
+ goto out;
for (i = 0; i < count; ++i)
freelist = free_clear_pte(&pte[i], pte[i], freelist);
if (freelist != NULL)
- dom->updated = true;
+ updated = true;
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom,
for (i = 0; i < count; ++i)
pte[i] = __pte;
- update_domain(dom);
+ ret = 0;
+
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ update_domain(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
/* Everything flushed out, free pages now */
free_page_list(freelist);
- return 0;
+ return ret;
}
static unsigned long iommu_unmap_page(struct protection_domain *dom,
@@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain)
static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->domain.lock, flags);
domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain);
+ spin_unlock_irqrestore(&dom->domain.lock, flags);
}
static void iova_domain_flush_tlb(struct iova_domain *iovad)
@@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1;
}
-/*
- * If a device is not yet associated with a domain, this function makes the
- * device visible in the domain
- */
-static int __attach_device(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
-{
- int ret;
-
- /* lock domain */
- spin_lock(&domain->lock);
-
- ret = -EBUSY;
- if (dev_data->domain != NULL)
- goto out_unlock;
-
- /* Attach alias group root */
- do_attach(dev_data, domain);
-
- ret = 0;
-
-out_unlock:
-
- /* ready */
- spin_unlock(&domain->lock);
-
- return ret;
-}
-
-
static void pdev_iommuv2_disable(struct pci_dev *pdev)
{
pci_disable_ats(pdev);
@@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev,
unsigned long flags;
int ret;
+ spin_lock_irqsave(&domain->lock, flags);
+
dev_data = get_dev_data(dev);
+ spin_lock(&dev_data->lock);
+
+ ret = -EBUSY;
+ if (dev_data->domain != NULL)
+ goto out;
+
if (!dev_is_pci(dev))
goto skip_ats_check;
pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) {
+ ret = -EINVAL;
if (!dev_data->passthrough)
- return -EINVAL;
+ goto out;
if (dev_data->iommu_v2) {
if (pdev_iommuv2_enable(pdev) != 0)
- return -EINVAL;
+ goto out;
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
@@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev,
}
skip_ats_check:
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- ret = __attach_device(dev_data, domain);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ ret = 0;
+
+ do_attach(dev_data, domain);
/*
* We might boot into a crash-kernel here. The crashed kernel
@@ -2169,23 +2212,14 @@ skip_ats_check:
*/
domain_flush_tlb_pde(domain);
- return ret;
-}
-
-/*
- * Removes a device from a protection domain (unlocked)
- */
-static void __detach_device(struct iommu_dev_data *dev_data)
-{
- struct protection_domain *domain;
-
- domain = dev_data->domain;
+ domain_flush_complete(domain);
- spin_lock(&domain->lock);
+out:
+ spin_unlock(&dev_data->lock);
- do_detach(dev_data);
+ spin_unlock_irqrestore(&domain->lock, flags);
- spin_unlock(&domain->lock);
+ return ret;
}
/*
@@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev)
dev_data = get_dev_data(dev);
domain = dev_data->domain;
+ spin_lock_irqsave(&domain->lock, flags);
+
+ spin_lock(&dev_data->lock);
+
/*
* First check if the device is still attached. It might already
* be detached from its domain because the generic
@@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev)
* our alias handling.
*/
if (WARN_ON(!dev_data->domain))
- return;
+ goto out;
- /* lock device table */
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- __detach_device(dev_data);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ do_detach(dev_data);
if (!dev_is_pci(dev))
- return;
+ goto out;
if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
pdev_iommuv2_disable(to_pci_dev(dev));
@@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev)
pci_disable_ats(to_pci_dev(dev));
dev_data->ats.enabled = false;
+
+out:
+ spin_unlock(&dev_data->lock);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static int amd_iommu_add_device(struct device *dev)
@@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain)
static void update_domain(struct protection_domain *domain)
{
- if (!domain->updated)
- return;
-
update_device_table(domain);
domain_flush_devices(domain);
domain_flush_tlb_pde(domain);
-
- domain->updated = false;
}
static int dir2prot(enum dma_data_direction direction)
@@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev,
{
dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start, ret;
+ unsigned long flags;
unsigned int pages;
int prot = 0;
int i;
@@ -2429,8 +2465,10 @@ out_unmap:
iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
}
+ spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
+ spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, address, pages);
@@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
}
if (amd_iommu_unmap_flush) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
+ spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, dma_addr, pages);
} else {
pages = __roundup_pow_of_two(pages);
@@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain)
struct iommu_dev_data *entry;
unsigned long flags;
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&domain->lock, flags);
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
struct iommu_dev_data, list);
BUG_ON(!entry->domain);
- __detach_device(entry);
+ do_detach(entry);
}
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -3226,9 +3268,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
+ spin_lock_irqsave(&dom->lock, flags);
domain_flush_tlb_pde(dom);
domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -3290,7 +3335,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
/* Update data structure */
domain->mode = PAGE_MODE_NONE;
- domain->updated = true;
/* Make changes visible to IOMMUs */
update_domain(domain);
@@ -3336,7 +3380,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- domain->updated = true;
update_domain(domain);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 9ac229e92b07..c9c1612d52e0 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -475,7 +475,6 @@ struct protection_domain {
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
- bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
@@ -634,6 +633,9 @@ struct devid_map {
* This struct contains device specific data for the IOMMU
*/
struct iommu_dev_data {
+ /*Protect against attach/detach races */
+ spinlock_t lock;
+
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c6ba37df4b9d..dff4132b3702 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -754,6 +754,8 @@ base_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
if (!sk)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 48e209e55843..df1c7989e13d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -487,7 +487,7 @@ config FUJITSU_ES
depends on ACPI
help
This driver provides support for Extended Socket network device
- on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series.
+ on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series.
config THUNDERBOLT_NET
tristate "Networking over Thunderbolt cable"
diff --git a/drivers/net/arcnet/Kconfig b/drivers/net/arcnet/Kconfig
index faeb4419b205..27551bf3d7e4 100644
--- a/drivers/net/arcnet/Kconfig
+++ b/drivers/net/arcnet/Kconfig
@@ -56,19 +56,19 @@ config ARCNET_CAP
tristate "Enable CAP mode packet interface"
help
ARCnet "cap mode" packet encapsulation. Used to get the hardware
- acknowledge back to userspace. After the initial protocol byte every
- packet is stuffed with an extra 4 byte "cookie" which doesn't
- actually appear on the network. After transmit the driver will send
- back a packet with protocol byte 0 containing the status of the
- transmission:
- 0=no hardware acknowledge
- 1=excessive nak
- 2=transmission accepted by the receiver hardware
-
- Received packets are also stuffed with the extra 4 bytes but it will
- be random data.
-
- Cap only listens to protocol 1-8.
+ acknowledge back to userspace. After the initial protocol byte every
+ packet is stuffed with an extra 4 byte "cookie" which doesn't
+ actually appear on the network. After transmit the driver will send
+ back a packet with protocol byte 0 containing the status of the
+ transmission:
+ 0=no hardware acknowledge
+ 1=excessive nak
+ 2=transmission accepted by the receiver hardware
+
+ Received packets are also stuffed with the extra 4 bytes but it will
+ be random data.
+
+ Cap only listens to protocol 1-8.
config ARCNET_COM90xx
tristate "ARCnet COM90xx (normal) chipset driver"
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 8459115d9d4e..553776cc1d29 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -1063,31 +1063,34 @@ EXPORT_SYMBOL(arcnet_interrupt);
static void arcnet_rx(struct net_device *dev, int bufnum)
{
struct arcnet_local *lp = netdev_priv(dev);
- struct archdr pkt;
+ union {
+ struct archdr pkt;
+ char buf[512];
+ } rxdata;
struct arc_rfc1201 *soft;
int length, ofs;
- soft = &pkt.soft.rfc1201;
+ soft = &rxdata.pkt.soft.rfc1201;
- lp->hw.copy_from_card(dev, bufnum, 0, &pkt, ARC_HDR_SIZE);
- if (pkt.hard.offset[0]) {
- ofs = pkt.hard.offset[0];
+ lp->hw.copy_from_card(dev, bufnum, 0, &rxdata.pkt, ARC_HDR_SIZE);
+ if (rxdata.pkt.hard.offset[0]) {
+ ofs = rxdata.pkt.hard.offset[0];
length = 256 - ofs;
} else {
- ofs = pkt.hard.offset[1];
+ ofs = rxdata.pkt.hard.offset[1];
length = 512 - ofs;
}
/* get the full header, if possible */
- if (sizeof(pkt.soft) <= length) {
- lp->hw.copy_from_card(dev, bufnum, ofs, soft, sizeof(pkt.soft));
+ if (sizeof(rxdata.pkt.soft) <= length) {
+ lp->hw.copy_from_card(dev, bufnum, ofs, soft, sizeof(rxdata.pkt.soft));
} else {
- memset(&pkt.soft, 0, sizeof(pkt.soft));
+ memset(&rxdata.pkt.soft, 0, sizeof(rxdata.pkt.soft));
lp->hw.copy_from_card(dev, bufnum, ofs, soft, length);
}
arc_printk(D_DURING, dev, "Buffer #%d: received packet from %02Xh to %02Xh (%d+4 bytes)\n",
- bufnum, pkt.hard.source, pkt.hard.dest, length);
+ bufnum, rxdata.pkt.hard.source, rxdata.pkt.hard.dest, length);
dev->stats.rx_packets++;
dev->stats.rx_bytes += length + ARC_HDR_SIZE;
@@ -1096,13 +1099,13 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
if (arc_proto_map[soft->proto]->is_ip) {
if (BUGLVL(D_PROTO)) {
struct ArcProto
- *oldp = arc_proto_map[lp->default_proto[pkt.hard.source]],
+ *oldp = arc_proto_map[lp->default_proto[rxdata.pkt.hard.source]],
*newp = arc_proto_map[soft->proto];
if (oldp != newp) {
arc_printk(D_PROTO, dev,
"got protocol %02Xh; encap for host %02Xh is now '%c' (was '%c')\n",
- soft->proto, pkt.hard.source,
+ soft->proto, rxdata.pkt.hard.source,
newp->suffix, oldp->suffix);
}
}
@@ -1111,10 +1114,10 @@ static void arcnet_rx(struct net_device *dev, int bufnum)
lp->default_proto[0] = soft->proto;
/* in striking contrast, the following isn't a hack. */
- lp->default_proto[pkt.hard.source] = soft->proto;
+ lp->default_proto[rxdata.pkt.hard.source] = soft->proto;
}
/* call the protocol-specific receiver. */
- arc_proto_map[soft->proto]->rx(dev, bufnum, &pkt, length);
+ arc_proto_map[soft->proto]->rx(dev, bufnum, &rxdata.pkt, length);
}
static void null_rx(struct net_device *dev, int bufnum,
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 4b3d0ddcda79..b412f7ba4f89 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -15,10 +15,10 @@ config CAN_EMS_USB
from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de).
config CAN_ESD_USB2
- tristate "ESD USB/2 CAN/USB interface"
- ---help---
- This driver supports the CAN-USB/2 interface
- from esd electronic system design gmbh (http://www.esd.eu).
+ tristate "ESD USB/2 CAN/USB interface"
+ ---help---
+ This driver supports the CAN-USB/2 interface
+ from esd electronic system design gmbh (http://www.esd.eu).
config CAN_GS_USB
tristate "Geschwister Schneider UG interfaces"
diff --git a/drivers/net/dsa/b53/b53_serdes.h b/drivers/net/dsa/b53/b53_serdes.h
index 3bb4f91aec9e..55d280fe38e4 100644
--- a/drivers/net/dsa/b53/b53_serdes.h
+++ b/drivers/net/dsa/b53/b53_serdes.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
- *
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
* Northstar Plus switch SerDes/SGMII PHY definitions
*
* Copyright (C) 2018 Florian Fainelli <f.fainelli@gmail.com>
diff --git a/drivers/net/dsa/lantiq_pce.h b/drivers/net/dsa/lantiq_pce.h
index 180663138e75..e2be31f3672a 100644
--- a/drivers/net/dsa/lantiq_pce.h
+++ b/drivers/net/dsa/lantiq_pce.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* PCE microcode extracted from UGW 7.1.1 switch api
*
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index a24d8e61fbe7..dd60d0837fc6 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -303,7 +303,7 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
{ \
.name = #width, \
.val_bits = (width), \
- .reg_stride = (width) / 8, \
+ .reg_stride = 1, \
.reg_bits = (regbits) + (regalign), \
.pad_bits = (regpad), \
.max_register = BIT(regbits) - 1, \
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 16f15c93a102..684aa51684db 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -936,6 +936,9 @@ qca8k_port_enable(struct dsa_switch *ds, int port,
{
struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
qca8k_port_set_status(priv, port, 1);
priv->port_sts[port].enabled = 1;
diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index 55424f39cb0d..f40b248f0b23 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -27,6 +27,7 @@ config NET_DSA_SJA1105_PTP
config NET_DSA_SJA1105_TAS
bool "Support for the Time-Aware Scheduler on NXP SJA1105"
depends on NET_DSA_SJA1105
+ depends on NET_SCH_TAPRIO
help
This enables support for the TTEthernet-based egress scheduling
engine in the SJA1105 DSA driver, which is controlled using a
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1e2de9d062bf..e8e9c166185d 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -140,17 +140,6 @@ source "drivers/net/ethernet/neterion/Kconfig"
source "drivers/net/ethernet/netronome/Kconfig"
source "drivers/net/ethernet/ni/Kconfig"
source "drivers/net/ethernet/8390/Kconfig"
-
-config NET_NETX
- tristate "NetX Ethernet support"
- select MII
- depends on ARCH_NETX
- ---help---
- This is support for the Hilscher netX builtin Ethernet ports
-
- To compile this driver as a module, choose M here. The module
- will be called netx-eth.
-
source "drivers/net/ethernet/nvidia/Kconfig"
source "drivers/net/ethernet/nxp/Kconfig"
source "drivers/net/ethernet/oki-semi/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 77f9838a76c9..05abebc17804 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -64,7 +64,6 @@ obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
obj-$(CONFIG_NET_VENDOR_NI) += ni/
-obj-$(CONFIG_NET_NETX) += netx-eth.o
obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
obj-$(CONFIG_LPC_ENET) += nxp/
obj-$(CONFIG_NET_VENDOR_OKI) += oki-semi/
diff --git a/drivers/net/ethernet/allwinner/Kconfig b/drivers/net/ethernet/allwinner/Kconfig
index a5e2bcbf2722..264a482ec31d 100644
--- a/drivers/net/ethernet/allwinner/Kconfig
+++ b/drivers/net/ethernet/allwinner/Kconfig
@@ -21,17 +21,17 @@ config NET_VENDOR_ALLWINNER
if NET_VENDOR_ALLWINNER
config SUN4I_EMAC
- tristate "Allwinner A10 EMAC support"
+ tristate "Allwinner A10 EMAC support"
depends on ARCH_SUNXI
depends on OF
select CRC32
select MII
select PHYLIB
select MDIO_SUN4I
- ---help---
- Support for Allwinner A10 EMAC ethernet driver.
+ ---help---
+ Support for Allwinner A10 EMAC ethernet driver.
- To compile this driver as a module, choose M here. The module
- will be called sun4i-emac.
+ To compile this driver as a module, choose M here. The module
+ will be called sun4i-emac.
endif # NET_VENDOR_ALLWINNER
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
index 69ca99d8ac26..cca72a75f551 100644
--- a/drivers/net/ethernet/amazon/Kconfig
+++ b/drivers/net/ethernet/amazon/Kconfig
@@ -19,6 +19,7 @@ if NET_VENDOR_AMAZON
config ENA_ETHERNET
tristate "Elastic Network Adapter (ENA) support"
depends on PCI_MSI && !CPU_BIG_ENDIAN
+ select DIMLIB
---help---
This driver supports Elastic Network Adapter (ENA)"
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 38046bf0ff44..2845ac277724 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -211,8 +211,8 @@ static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
pkt_ctrl->curr_bounce_buf =
ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
- memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
- 0x0, llq_info->desc_list_entry_size);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
pkt_ctrl->idx = 0;
if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index 28892b8acd0e..a95c263a45aa 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -306,15 +306,13 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private)
{
struct aq_vec_s *self = private;
u64 irq_mask = 0U;
- irqreturn_t err = 0;
+ int err;
- if (!self) {
- err = -EINVAL;
- goto err_exit;
- }
+ if (!self)
+ return IRQ_NONE;
err = self->aq_hw_ops->hw_irq_read(self->aq_hw, &irq_mask);
if (err < 0)
- goto err_exit;
+ return IRQ_NONE;
if (irq_mask) {
self->aq_hw_ops->hw_irq_disable(self->aq_hw,
@@ -322,11 +320,10 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private)
napi_schedule(&self->napi);
} else {
self->aq_hw_ops->hw_irq_enable(self->aq_hw, 1U);
- err = IRQ_NONE;
+ return IRQ_NONE;
}
-err_exit:
- return err >= 0 ? IRQ_HANDLED : IRQ_NONE;
+ return IRQ_HANDLED;
}
cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self)
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 7df887e4024c..a977a459bd20 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2481,7 +2481,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
priv->phy_interface = of_get_phy_mode(dn);
/* Default to GMII interface mode */
- if (priv->phy_interface < 0)
+ if ((int)priv->phy_interface < 0)
priv->phy_interface = PHY_INTERFACE_MODE_GMII;
/* In the case of a fixed PHY, the DT node associated
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 35b59b5edf0f..8e8d557901a9 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -165,9 +165,8 @@ static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
{
- if (bp->hw_dma_cap & HW_DMA_CAP_64B)
- return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
- return NULL;
+ return (struct macb_dma_desc_64 *)((void *)desc
+ + sizeof(struct macb_dma_desc));
}
#endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 71854a19cebe..38024877751c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5701,7 +5701,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
whoami = t4_read_reg(adapter, PL_WHOAMI_A);
pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
- if (chip < 0) {
+ if ((int)chip < 0) {
dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
err = chip;
goto out_free_adapter;
diff --git a/drivers/net/ethernet/emulex/benet/Kconfig b/drivers/net/ethernet/emulex/benet/Kconfig
index e8c7eb842dbe..17d300ea9955 100644
--- a/drivers/net/ethernet/emulex/benet/Kconfig
+++ b/drivers/net/ethernet/emulex/benet/Kconfig
@@ -48,5 +48,5 @@ config BE2NET_SKYHAWK
chipsets. (e.g. OneConnect OCe14xxx)
comment "WARNING: be2net is useless without any enabled chip"
- depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
+ depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
BE2NET_SKYHAWK=n && BE2NET
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 7d6513ff8507..b73421c3e25b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -785,7 +785,7 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
}
priv->if_mode = of_get_phy_mode(np);
- if (priv->if_mode < 0) {
+ if ((int)priv->if_mode < 0) {
dev_err(priv->dev, "missing phy type\n");
of_node_put(priv->phy_node);
if (of_phy_is_fixed_link(np))
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 24bf7f68375f..51ad86417cb1 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2067,7 +2067,7 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-void reset_gfar(struct net_device *ndev)
+static void reset_gfar(struct net_device *ndev)
{
struct gfar_private *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 95a6b0926170..c41b19c760f8 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1194,7 +1194,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
goto err_free_mdio;
priv->phy_mode = of_get_phy_mode(node);
- if (priv->phy_mode < 0) {
+ if ((int)priv->phy_mode < 0) {
netdev_err(ndev, "not find phy-mode\n");
ret = -EINVAL;
goto err_mdiobus;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2e5172f61564..2b073a3c0b84 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1207,7 +1207,7 @@ static void ibmvnic_cleanup(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
/* ensure that transmissions are stopped if called by do_reset */
- if (adapter->resetting)
+ if (test_bit(0, &adapter->resetting))
netif_tx_disable(netdev);
else
netif_tx_stop_all_queues(netdev);
@@ -1428,7 +1428,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
u8 proto = 0;
netdev_tx_t ret = NETDEV_TX_OK;
- if (adapter->resetting) {
+ if (test_bit(0, &adapter->resetting)) {
if (!netif_subqueue_stopped(netdev, skb))
netif_stop_subqueue(netdev, queue_num);
dev_kfree_skb_any(skb);
@@ -1724,6 +1724,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
}
/**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_rwi *rwi,
+ u32 reset_state)
+{
+ struct net_device *netdev = adapter->netdev;
+ int i, rc;
+
+ netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+ rwi->reset_reason);
+
+ netif_carrier_off(netdev);
+ adapter->reset_reason = rwi->reset_reason;
+
+ ibmvnic_cleanup(netdev);
+
+ if (reset_state == VNIC_OPEN) {
+ rc = __ibmvnic_close(netdev);
+ if (rc)
+ return rc;
+ }
+
+ release_resources(adapter);
+ release_sub_crqs(adapter, 1);
+ release_crq_queue(adapter);
+
+ adapter->state = VNIC_PROBED;
+
+ rc = init_crq_queue(adapter);
+
+ if (rc) {
+ netdev_err(adapter->netdev,
+ "Couldn't initialize crq. rc=%d\n", rc);
+ return rc;
+ }
+
+ rc = ibmvnic_reset_init(adapter);
+ if (rc)
+ return IBMVNIC_INIT_FAILED;
+
+ /* If the adapter was in PROBE state prior to the reset,
+ * exit here.
+ */
+ if (reset_state == VNIC_PROBED)
+ return 0;
+
+ rc = ibmvnic_login(netdev);
+ if (rc) {
+ adapter->state = reset_state;
+ return rc;
+ }
+
+ rc = init_resources(adapter);
+ if (rc)
+ return rc;
+
+ ibmvnic_disable_irqs(adapter);
+
+ adapter->state = VNIC_CLOSED;
+
+ if (reset_state == VNIC_CLOSED)
+ return 0;
+
+ rc = __ibmvnic_open(netdev);
+ if (rc)
+ return IBMVNIC_OPEN_FAILED;
+
+ /* refresh device's multicast list */
+ ibmvnic_set_multi(netdev);
+
+ /* kick napi */
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_schedule(&adapter->napi[i]);
+
+ return 0;
+}
+
+/**
* do_reset returns zero if we are able to keep processing reset events, or
* non-zero if we hit a fatal error and must halt.
*/
@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
rwi->reset_reason);
+ rtnl_lock();
+
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
- rc = __ibmvnic_close(netdev);
+ adapter->state = VNIC_CLOSING;
+
+ /* Release the RTNL lock before link state change and
+ * re-acquire after the link state change to allow
+ * linkwatch_event to grab the RTNL lock and run during
+ * a reset.
+ */
+ rtnl_unlock();
+ rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+ rtnl_lock();
if (rc)
- return rc;
- }
+ goto out;
- if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
- adapter->wait_for_reset) {
- release_resources(adapter);
- release_sub_crqs(adapter, 1);
- release_crq_queue(adapter);
+ if (adapter->state != VNIC_CLOSING) {
+ rc = -1;
+ goto out;
+ }
+
+ adapter->state = VNIC_CLOSED;
}
if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
@@ -1769,9 +1860,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
*/
adapter->state = VNIC_PROBED;
- if (adapter->wait_for_reset) {
- rc = init_crq_queue(adapter);
- } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
rc = ibmvnic_reenable_crq_queue(adapter);
release_sub_crqs(adapter, 1);
} else {
@@ -1783,36 +1872,35 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (rc) {
netdev_err(adapter->netdev,
"Couldn't initialize crq. rc=%d\n", rc);
- return rc;
+ goto out;
}
rc = ibmvnic_reset_init(adapter);
- if (rc)
- return IBMVNIC_INIT_FAILED;
+ if (rc) {
+ rc = IBMVNIC_INIT_FAILED;
+ goto out;
+ }
/* If the adapter was in PROBE state prior to the reset,
* exit here.
*/
- if (reset_state == VNIC_PROBED)
- return 0;
+ if (reset_state == VNIC_PROBED) {
+ rc = 0;
+ goto out;
+ }
rc = ibmvnic_login(netdev);
if (rc) {
adapter->state = reset_state;
- return rc;
+ goto out;
}
- if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
- adapter->wait_for_reset) {
- rc = init_resources(adapter);
- if (rc)
- return rc;
- } else if (adapter->req_rx_queues != old_num_rx_queues ||
- adapter->req_tx_queues != old_num_tx_queues ||
- adapter->req_rx_add_entries_per_subcrq !=
- old_num_rx_slots ||
- adapter->req_tx_entries_per_subcrq !=
- old_num_tx_slots) {
+ if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues ||
+ adapter->req_rx_add_entries_per_subcrq !=
+ old_num_rx_slots ||
+ adapter->req_tx_entries_per_subcrq !=
+ old_num_tx_slots) {
release_rx_pools(adapter);
release_tx_pools(adapter);
release_napi(adapter);
@@ -1820,32 +1908,30 @@ static int do_reset(struct ibmvnic_adapter *adapter,
rc = init_resources(adapter);
if (rc)
- return rc;
+ goto out;
} else {
rc = reset_tx_pools(adapter);
if (rc)
- return rc;
+ goto out;
rc = reset_rx_pools(adapter);
if (rc)
- return rc;
+ goto out;
}
ibmvnic_disable_irqs(adapter);
}
adapter->state = VNIC_CLOSED;
- if (reset_state == VNIC_CLOSED)
- return 0;
+ if (reset_state == VNIC_CLOSED) {
+ rc = 0;
+ goto out;
+ }
rc = __ibmvnic_open(netdev);
if (rc) {
- if (list_empty(&adapter->rwi_list))
- adapter->state = VNIC_CLOSED;
- else
- adapter->state = reset_state;
-
- return 0;
+ rc = IBMVNIC_OPEN_FAILED;
+ goto out;
}
/* refresh device's multicast list */
@@ -1855,11 +1941,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
for (i = 0; i < adapter->req_rx_queues; i++)
napi_schedule(&adapter->napi[i]);
- if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
- adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
+ if (adapter->reset_reason != VNIC_RESET_FAILOVER)
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
- return 0;
+ rc = 0;
+
+out:
+ rtnl_unlock();
+
+ return rc;
}
static int do_hard_reset(struct ibmvnic_adapter *adapter,
@@ -1919,14 +2009,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
return 0;
rc = __ibmvnic_open(netdev);
- if (rc) {
- if (list_empty(&adapter->rwi_list))
- adapter->state = VNIC_CLOSED;
- else
- adapter->state = reset_state;
-
- return 0;
- }
+ if (rc)
+ return IBMVNIC_OPEN_FAILED;
return 0;
}
@@ -1965,20 +2049,17 @@ static void __ibmvnic_reset(struct work_struct *work)
{
struct ibmvnic_rwi *rwi;
struct ibmvnic_adapter *adapter;
- bool we_lock_rtnl = false;
u32 reset_state;
int rc = 0;
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
- /* netif_set_real_num_xx_queues needs to take rtnl lock here
- * unless wait_for_reset is set, in which case the rtnl lock
- * has already been taken before initializing the reset
- */
- if (!adapter->wait_for_reset) {
- rtnl_lock();
- we_lock_rtnl = true;
+ if (test_and_set_bit_lock(0, &adapter->resetting)) {
+ schedule_delayed_work(&adapter->ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+ return;
}
+
reset_state = adapter->state;
rwi = get_next_rwi(adapter);
@@ -1990,22 +2071,43 @@ static void __ibmvnic_reset(struct work_struct *work)
break;
}
- if (adapter->force_reset_recovery) {
- adapter->force_reset_recovery = false;
- rc = do_hard_reset(adapter, rwi, reset_state);
+ if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ /* CHANGE_PARAM requestor holds rtnl_lock */
+ rc = do_change_param_reset(adapter, rwi, reset_state);
+ } else if (adapter->force_reset_recovery) {
+ /* Transport event occurred during previous reset */
+ if (adapter->wait_for_reset) {
+ /* Previous was CHANGE_PARAM; caller locked */
+ adapter->force_reset_recovery = false;
+ rc = do_hard_reset(adapter, rwi, reset_state);
+ } else {
+ rtnl_lock();
+ adapter->force_reset_recovery = false;
+ rc = do_hard_reset(adapter, rwi, reset_state);
+ rtnl_unlock();
+ }
} else {
rc = do_reset(adapter, rwi, reset_state);
}
kfree(rwi);
- if (rc && rc != IBMVNIC_INIT_FAILED &&
+ if (rc == IBMVNIC_OPEN_FAILED) {
+ if (list_empty(&adapter->rwi_list))
+ adapter->state = VNIC_CLOSED;
+ else
+ adapter->state = reset_state;
+ rc = 0;
+ } else if (rc && rc != IBMVNIC_INIT_FAILED &&
!adapter->force_reset_recovery)
break;
rwi = get_next_rwi(adapter);
+
+ if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+ rwi->reset_reason == VNIC_RESET_MOBILITY))
+ adapter->force_reset_recovery = true;
}
if (adapter->wait_for_reset) {
- adapter->wait_for_reset = false;
adapter->reset_done_rc = rc;
complete(&adapter->reset_done);
}
@@ -2015,9 +2117,16 @@ static void __ibmvnic_reset(struct work_struct *work)
free_all_rwi(adapter);
}
- adapter->resetting = false;
- if (we_lock_rtnl)
- rtnl_unlock();
+ clear_bit_unlock(0, &adapter->resetting);
+}
+
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+ struct ibmvnic_adapter *adapter;
+
+ adapter = container_of(work, struct ibmvnic_adapter,
+ ibmvnic_delayed_reset.work);
+ __ibmvnic_reset(&adapter->ibmvnic_reset);
}
static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -2072,14 +2181,11 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi->reset_reason = reason;
list_add_tail(&rwi->list, &adapter->rwi_list);
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
- adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(&adapter->ibmvnic_reset);
return 0;
err:
- if (adapter->wait_for_reset)
- adapter->wait_for_reset = false;
return -ret;
}
@@ -2119,7 +2225,7 @@ restart_poll:
u16 offset;
u8 flags = 0;
- if (unlikely(adapter->resetting &&
+ if (unlikely(test_bit(0, &adapter->resetting) &&
adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
napi_complete_done(napi, frames_processed);
@@ -2770,7 +2876,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
return 1;
}
- if (adapter->resetting &&
+ if (test_bit(0, &adapter->resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
u64 val = (0xff000000) | scrq->hw_irq;
@@ -3320,7 +3426,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
if (rc) {
if (rc == H_CLOSED) {
dev_warn(dev, "CRQ Queue closed\n");
- if (adapter->resetting)
+ if (test_bit(0, &adapter->resetting))
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
}
@@ -4312,13 +4418,14 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
{
struct net_device *netdev = adapter->netdev;
int rc;
+ __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed);
rc = crq->query_phys_parms_rsp.rc.code;
if (rc) {
netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc);
return rc;
}
- switch (cpu_to_be32(crq->query_phys_parms_rsp.speed)) {
+ switch (rspeed) {
case IBMVNIC_10MBPS:
adapter->speed = SPEED_10;
break;
@@ -4344,8 +4451,8 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
adapter->speed = SPEED_100000;
break;
default:
- netdev_warn(netdev, "Unknown speed 0x%08x\n",
- cpu_to_be32(crq->query_phys_parms_rsp.speed));
+ if (netif_carrier_ok(netdev))
+ netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
adapter->speed = SPEED_UNKNOWN;
}
if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX)
@@ -4395,7 +4502,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
adapter->crq.active = false;
- if (adapter->resetting)
+ if (test_bit(0, &adapter->resetting))
adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
dev_info(dev, "Migrated, re-enabling adapter\n");
@@ -4733,7 +4840,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
return -1;
}
- if (adapter->resetting && !adapter->wait_for_reset &&
+ if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
adapter->reset_reason != VNIC_RESET_MOBILITY) {
if (adapter->req_rx_queues != old_num_rx_queues ||
adapter->req_tx_queues != old_num_tx_queues) {
@@ -4845,10 +4952,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
spin_lock_init(&adapter->stats_lock);
INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
+ INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
+ __ibmvnic_delayed_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
init_completion(&adapter->init_done);
- adapter->resetting = false;
+ clear_bit(0, &adapter->resetting);
do {
rc = init_crq_queue(adapter);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 70bd286f8932..ebc39248b334 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -20,6 +20,7 @@
#define IBMVNIC_INVALID_MAP -1
#define IBMVNIC_STATS_TIMEOUT 1
#define IBMVNIC_INIT_FAILED 2
+#define IBMVNIC_OPEN_FAILED 3
/* basic structures plus 100 2k buffers */
#define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305
@@ -38,6 +39,8 @@
#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
#define IBMVNIC_BUFFER_HLEN 500
+#define IBMVNIC_RESET_DELAY 100
+
static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
#define IBMVNIC_USE_SERVER_MAXES 0x1
"use-server-maxes"
@@ -1076,7 +1079,8 @@ struct ibmvnic_adapter {
spinlock_t rwi_lock;
struct list_head rwi_list;
struct work_struct ibmvnic_reset;
- bool resetting;
+ struct delayed_work ibmvnic_delayed_reset;
+ unsigned long resetting;
bool napi_enabled, from_passive_init;
bool failover_pending;
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 0a2ec387a482..095f6c71b4fa 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3108,7 +3108,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
skb_put(skb, len);
if (dev->features & NETIF_F_RXCSUM) {
- skb->csum = csum;
+ skb->csum = le16_to_cpu(csum);
skb->ip_summed = CHECKSUM_COMPLETE;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 0dba272a5b2f..a1f20b205299 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -20,15 +20,15 @@ config MLX5_ACCEL
bool
config MLX5_FPGA
- bool "Mellanox Technologies Innova support"
- depends on MLX5_CORE
+ bool "Mellanox Technologies Innova support"
+ depends on MLX5_CORE
select MLX5_ACCEL
- ---help---
- Build support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
+ ---help---
+ Build support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
config MLX5_CORE_EN
bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
@@ -58,14 +58,14 @@ config MLX5_EN_RXNFC
API.
config MLX5_MPFS
- bool "Mellanox Technologies MLX5 MPFS support"
- depends on MLX5_CORE_EN
+ bool "Mellanox Technologies MLX5 MPFS support"
+ depends on MLX5_CORE_EN
default y
- ---help---
+ ---help---
Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
- support in ConnectX NIC. MPFs is required for when multi-PF configuration
- is enabled to allow passing user configured unicast MAC addresses to the
- requesting PF.
+ support in ConnectX NIC. MPFs is required for when multi-PF configuration
+ is enabled to allow passing user configured unicast MAC addresses to the
+ requesting PF.
config MLX5_ESWITCH
bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
@@ -73,10 +73,10 @@ config MLX5_ESWITCH
default y
---help---
Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
- E-Switch provides internal SRIOV packet steering and switching for the
- enabled VFs and PF in two available modes:
- Legacy SRIOV mode (L2 mac vlan steering based).
- Switchdev mode (eswitch offloads).
+ E-Switch provides internal SRIOV packet steering and switching for the
+ enabled VFs and PF in two available modes:
+ Legacy SRIOV mode (L2 mac vlan steering based).
+ Switchdev mode (eswitch offloads).
config MLX5_CORE_EN_DCB
bool "Data Center Bridging (DCB) Support"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index eed7101e8bb7..acd946f2ddbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -399,10 +399,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
struct mlx5_flow_table *ft,
struct ethtool_rx_flow_spec *fs)
{
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
struct mlx5_flow_destination *dst = NULL;
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec;
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
int err = 0;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index da7555fdb4d5..3e78a727f3e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1664,46 +1664,63 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return err;
}
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
- struct flow_match_ipv4_addrs match;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_match_control match;
+ u16 addr_type;
- flow_rule_match_enc_ipv4_addrs(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(match.mask->src));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(match.key->src));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(match.mask->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(match.key->dst));
-
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
- } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
- struct flow_match_ipv6_addrs match;
+ flow_rule_match_enc_control(rule, &match);
+ addr_type = match.key->addr_type;
- flow_rule_match_enc_ipv6_addrs(rule, &match);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ /* For tunnel addr_type used same key id`s as for non-tunnel */
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->src));
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(match.key->dst));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ETH_P_IP);
+ } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ETH_P_IPV6);
+ }
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 9648c2297803..e47dd7c1b909 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1568,6 +1568,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
{ 0, }
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 7d81a7735de5..b74b7d0f6590 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -615,7 +615,7 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
* that recalculates the CS and forwards to the vport.
*/
ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
- dest_action->vport.num,
+ dest_action->vport.caps->num,
final_icm_addr);
if (ret) {
mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
@@ -744,7 +744,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
dest_action = action;
if (rx_rule) {
/* Loopback on WIRE vport is not supported */
- if (action->vport.num == WIRE_PORT)
+ if (action->vport.caps->num == WIRE_PORT)
goto out_invalid_arg;
attr.final_icm_addr = action->vport.caps->icm_address_rx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 01008cd66f75..67dea7698fc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -230,8 +230,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
(dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
- &dmn->info.caps,
- inner, rx);
+ dmn, inner, rx);
if (ret)
return ret;
}
@@ -458,13 +457,11 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
prev_matcher = NULL;
if (next_matcher && !first)
- prev_matcher = list_entry(next_matcher->matcher_list.prev,
- struct mlx5dr_matcher,
- matcher_list);
+ prev_matcher = list_prev_entry(next_matcher, matcher_list);
else if (!first)
- prev_matcher = list_entry(tbl->matcher_list.prev,
- struct mlx5dr_matcher,
- matcher_list);
+ prev_matcher = list_last_entry(&tbl->matcher_list,
+ struct mlx5dr_matcher,
+ matcher_list);
if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 3bc3f66b8fa8..4187f2b112b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -18,7 +18,7 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
struct mlx5dr_ste *last_ste;
/* The new entry will be inserted after the last */
- last_ste = list_entry(miss_list->prev, struct mlx5dr_ste, miss_list_node);
+ last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node);
WARN_ON(!last_ste);
ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 6b0af64536d8..4efe1b0be4a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -429,12 +429,9 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
struct mlx5dr_ste *prev_ste;
u64 miss_addr;
- prev_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->prev, struct mlx5dr_ste,
- miss_list_node);
- if (!prev_ste) {
- WARN_ON(true);
+ prev_ste = list_prev_entry(ste, miss_list_node);
+ if (WARN_ON(!prev_ste))
return;
- }
miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste);
mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr);
@@ -461,8 +458,8 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
struct mlx5dr_ste_htbl *stats_tbl;
LIST_HEAD(send_ste_list);
- first_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->next,
- struct mlx5dr_ste, miss_list_node);
+ first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste),
+ struct mlx5dr_ste, miss_list_node);
stats_tbl = first_ste->htbl;
/* Two options:
@@ -479,8 +476,7 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste,
if (last_ste == first_ste)
next_ste = NULL;
else
- next_ste = list_entry(ste->miss_list_node.next,
- struct mlx5dr_ste, miss_list_node);
+ next_ste = list_next_entry(ste, miss_list_node);
if (!next_ste) {
/* One and only entry in the list */
@@ -841,6 +837,8 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn);
spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port);
+ spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask,
+ source_eswitch_owner_vhca_id);
spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio);
spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi);
@@ -2254,11 +2252,18 @@ static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
{
struct mlx5dr_match_misc *misc_mask = &value->misc;
- if (misc_mask->source_port != 0xffff)
+ /* Partial misc source_port is not supported */
+ if (misc_mask->source_port && misc_mask->source_port != 0xffff)
+ return -EINVAL;
+
+ /* Partial misc source_eswitch_owner_vhca_id is not supported */
+ if (misc_mask->source_eswitch_owner_vhca_id &&
+ misc_mask->source_eswitch_owner_vhca_id != 0xffff)
return -EINVAL;
DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+ misc_mask->source_eswitch_owner_vhca_id = 0;
return 0;
}
@@ -2270,17 +2275,33 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc *misc = &value->misc;
struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *dmn = sb->dmn;
+ struct mlx5dr_cmd_caps *caps;
u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
- vport_cap = mlx5dr_get_vport_cap(sb->caps, misc->source_port);
+ if (sb->vhca_id_valid) {
+ /* Find port GVMI based on the eswitch_owner_vhca_id */
+ if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+ caps = &dmn->info.caps;
+ else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+ dmn->peer_dmn->info.caps.gvmi))
+ caps = &dmn->peer_dmn->info.caps;
+ else
+ return -EINVAL;
+ } else {
+ caps = &dmn->info.caps;
+ }
+
+ vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port);
if (!vport_cap)
return -EINVAL;
if (vport_cap->vport_gvmi)
MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
+ misc->source_eswitch_owner_vhca_id = 0;
misc->source_port = 0;
return 0;
@@ -2288,17 +2309,20 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
- struct mlx5dr_cmd_caps *caps,
+ struct mlx5dr_domain *dmn,
bool inner, bool rx)
{
int ret;
+ /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
+ sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
+
ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
if (ret)
return ret;
sb->rx = rx;
- sb->caps = caps;
+ sb->dmn = dmn;
sb->inner = inner;
sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index a37ee6359be2..1cb3769d4e3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -180,6 +180,8 @@ void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
struct mlx5dr_ste_build {
u8 inner:1;
u8 rx:1;
+ u8 vhca_id_valid:1;
+ struct mlx5dr_domain *dmn;
struct mlx5dr_cmd_caps *caps;
u8 lu_type;
u16 byte_mask;
@@ -331,7 +333,7 @@ void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
bool inner, bool rx);
int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
- struct mlx5dr_cmd_caps *caps,
+ struct mlx5dr_domain *dmn,
bool inner, bool rx);
void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
@@ -453,7 +455,7 @@ struct mlx5dr_match_misc {
u32 gre_c_present:1;
/* Source port.;0xffff determines wire port */
u32 source_port:16;
- u32 reserved_auto2:16;
+ u32 source_eswitch_owner_vhca_id:16;
/* VLAN ID of first VLAN tag the inner header of the incoming packet.
* Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
*/
@@ -745,7 +747,6 @@ struct mlx5dr_action {
struct {
struct mlx5dr_domain *dmn;
struct mlx5dr_cmd_vport_cap *caps;
- u32 num;
} vport;
struct {
u32 vlan_hdr; /* tpid_pcp_dei_vid */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dd234cf7b39d..dcf9562bce8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3771,6 +3771,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
goto err_port_qdiscs_init;
}
+ err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 0, VLAN_N_VID - 1, false,
+ false);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to clear VLAN filter\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_vlan_clear;
+ }
+
err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
@@ -3818,6 +3826,7 @@ err_port_vlan_create:
err_port_pvid_set:
mlxsw_sp_port_nve_fini(mlxsw_sp_port);
err_port_nve_init:
+err_port_vlan_clear:
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
err_port_qdiscs_init:
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 0ad1a24abfc6..b607919c8ad0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -21,6 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
struct netlink_ext_ack *extack)
{
const struct flow_action_entry *act;
+ int mirror_act_count = 0;
int err, i;
if (!flow_action_has_entries(flow_action))
@@ -105,6 +106,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
case FLOW_ACTION_MIRRED: {
struct net_device *out_dev = act->dev;
+ if (mirror_act_count++) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported");
+ return -EOPNOTSUPP;
+ }
+
err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
block, out_dev,
extack);
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index 23ebddfb9532..9f8a1f69c0c4 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -176,8 +176,10 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
u8 mask, val;
int err;
- if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack))
+ if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) {
+ err = -EOPNOTSUPP;
goto err_delete;
+ }
tos_off = proto == htons(ETH_P_IP) ? 16 : 20;
@@ -198,14 +200,18 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
if ((iter->val & cmask) == (val & cmask) &&
iter->band != knode->res->classid) {
NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter");
+ err = -EOPNOTSUPP;
goto err_delete;
}
}
if (!match) {
match = kzalloc(sizeof(*match), GFP_KERNEL);
- if (!match)
- return -ENOMEM;
+ if (!match) {
+ err = -ENOMEM;
+ goto err_delete;
+ }
+
list_add(&match->list, &alink->dscp_map);
}
match->handle = knode->handle;
@@ -221,7 +227,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
err_delete:
nfp_abm_u32_knode_delete(alink, knode);
- return -EOPNOTSUPP;
+ return err;
}
static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 7a20447cca19..d8ad9346a26a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -400,6 +400,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
if (!repr_priv) {
err = -ENOMEM;
+ nfp_repr_free(repr);
goto err_reprs_clean;
}
@@ -413,6 +414,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
port = nfp_port_alloc(app, port_type, repr);
if (IS_ERR(port)) {
err = PTR_ERR(port);
+ kfree(repr_priv);
nfp_repr_free(repr);
goto err_reprs_clean;
}
@@ -433,6 +435,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
err = nfp_repr_init(app, repr,
port_id, port, priv->nn->dp.netdev);
if (err) {
+ kfree(repr_priv);
nfp_port_free(port);
nfp_repr_free(repr);
goto err_reprs_clean;
@@ -515,6 +518,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
if (!repr_priv) {
err = -ENOMEM;
+ nfp_repr_free(repr);
goto err_reprs_clean;
}
@@ -525,11 +529,13 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
if (IS_ERR(port)) {
err = PTR_ERR(port);
+ kfree(repr_priv);
nfp_repr_free(repr);
goto err_reprs_clean;
}
err = nfp_port_init_phy_port(app->pf, app, port, i);
if (err) {
+ kfree(repr_priv);
nfp_port_free(port);
nfp_repr_free(repr);
goto err_reprs_clean;
@@ -542,6 +548,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
err = nfp_repr_init(app, repr,
cmsg_port_id, port, priv->nn->dp.netdev);
if (err) {
+ kfree(repr_priv);
nfp_port_free(port);
nfp_repr_free(repr);
goto err_reprs_clean;
diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
deleted file mode 100644
index cf6e7eb1b1e1..000000000000
--- a/drivers/net/ethernet/netx-eth.c
+++ /dev/null
@@ -1,497 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * drivers/net/ethernet/netx-eth.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/mii.h>
-
-#include <asm/io.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-#include <mach/pfifo.h>
-#include <mach/xc.h>
-#include <linux/platform_data/eth-netx.h>
-
-/* XC Fifo Offsets */
-#define EMPTY_PTR_FIFO(xcno) (0 + ((xcno) << 3)) /* Index of the empty pointer FIFO */
-#define IND_FIFO_PORT_HI(xcno) (1 + ((xcno) << 3)) /* Index of the FIFO where received */
- /* Data packages are indicated by XC */
-#define IND_FIFO_PORT_LO(xcno) (2 + ((xcno) << 3)) /* Index of the FIFO where received */
- /* Data packages are indicated by XC */
-#define REQ_FIFO_PORT_HI(xcno) (3 + ((xcno) << 3)) /* Index of the FIFO where Data packages */
- /* have to be indicated by ARM which */
- /* shall be sent */
-#define REQ_FIFO_PORT_LO(xcno) (4 + ((xcno) << 3)) /* Index of the FIFO where Data packages */
- /* have to be indicated by ARM which shall */
- /* be sent */
-#define CON_FIFO_PORT_HI(xcno) (5 + ((xcno) << 3)) /* Index of the FIFO where sent Data packages */
- /* are confirmed */
-#define CON_FIFO_PORT_LO(xcno) (6 + ((xcno) << 3)) /* Index of the FIFO where sent Data */
- /* packages are confirmed */
-#define PFIFO_MASK(xcno) (0x7f << (xcno*8))
-
-#define FIFO_PTR_FRAMELEN_SHIFT 0
-#define FIFO_PTR_FRAMELEN_MASK (0x7ff << 0)
-#define FIFO_PTR_FRAMELEN(len) (((len) << 0) & FIFO_PTR_FRAMELEN_MASK)
-#define FIFO_PTR_TIMETRIG (1<<11)
-#define FIFO_PTR_MULTI_REQ
-#define FIFO_PTR_ORIGIN (1<<14)
-#define FIFO_PTR_VLAN (1<<15)
-#define FIFO_PTR_FRAMENO_SHIFT 16
-#define FIFO_PTR_FRAMENO_MASK (0x3f << 16)
-#define FIFO_PTR_FRAMENO(no) (((no) << 16) & FIFO_PTR_FRAMENO_MASK)
-#define FIFO_PTR_SEGMENT_SHIFT 22
-#define FIFO_PTR_SEGMENT_MASK (0xf << 22)
-#define FIFO_PTR_SEGMENT(seg) (((seg) & 0xf) << 22)
-#define FIFO_PTR_ERROR_SHIFT 28
-#define FIFO_PTR_ERROR_MASK (0xf << 28)
-
-#define ISR_LINK_STATUS_CHANGE (1<<4)
-#define ISR_IND_LO (1<<3)
-#define ISR_CON_LO (1<<2)
-#define ISR_IND_HI (1<<1)
-#define ISR_CON_HI (1<<0)
-
-#define ETH_MAC_LOCAL_CONFIG 0x1560
-#define ETH_MAC_4321 0x1564
-#define ETH_MAC_65 0x1568
-
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT 16
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK (0xf<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT)
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT(x) (((x)<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT) & MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK)
-#define LOCAL_CONFIG_LINK_STATUS_IRQ_EN (1<<24)
-#define LOCAL_CONFIG_CON_LO_IRQ_EN (1<<23)
-#define LOCAL_CONFIG_CON_HI_IRQ_EN (1<<22)
-#define LOCAL_CONFIG_IND_LO_IRQ_EN (1<<21)
-#define LOCAL_CONFIG_IND_HI_IRQ_EN (1<<20)
-
-#define CARDNAME "netx-eth"
-
-/* LSB must be zero */
-#define INTERNAL_PHY_ADR 0x1c
-
-struct netx_eth_priv {
- void __iomem *sram_base, *xpec_base, *xmac_base;
- int id;
- struct mii_if_info mii;
- u32 msg_enable;
- struct xc *xc;
- spinlock_t lock;
-};
-
-static void netx_eth_set_multicast_list(struct net_device *ndev)
-{
- /* implement me */
-}
-
-static int
-netx_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
- unsigned char *buf = skb->data;
- unsigned int len = skb->len;
-
- spin_lock_irq(&priv->lock);
- memcpy_toio(priv->sram_base + 1560, (void *)buf, len);
- if (len < 60) {
- memset_io(priv->sram_base + 1560 + len, 0, 60 - len);
- len = 60;
- }
-
- pfifo_push(REQ_FIFO_PORT_LO(priv->id),
- FIFO_PTR_SEGMENT(priv->id) |
- FIFO_PTR_FRAMENO(1) |
- FIFO_PTR_FRAMELEN(len));
-
- ndev->stats.tx_packets++;
- ndev->stats.tx_bytes += skb->len;
-
- netif_stop_queue(ndev);
- spin_unlock_irq(&priv->lock);
- dev_kfree_skb(skb);
-
- return NETDEV_TX_OK;
-}
-
-static void netx_eth_receive(struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
- unsigned int val, frameno, seg, len;
- unsigned char *data;
- struct sk_buff *skb;
-
- val = pfifo_pop(IND_FIFO_PORT_LO(priv->id));
-
- frameno = (val & FIFO_PTR_FRAMENO_MASK) >> FIFO_PTR_FRAMENO_SHIFT;
- seg = (val & FIFO_PTR_SEGMENT_MASK) >> FIFO_PTR_SEGMENT_SHIFT;
- len = (val & FIFO_PTR_FRAMELEN_MASK) >> FIFO_PTR_FRAMELEN_SHIFT;
-
- skb = netdev_alloc_skb(ndev, len);
- if (unlikely(skb == NULL)) {
- ndev->stats.rx_dropped++;
- return;
- }
-
- data = skb_put(skb, len);
-
- memcpy_fromio(data, priv->sram_base + frameno * 1560, len);
-
- pfifo_push(EMPTY_PTR_FIFO(priv->id),
- FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
-
- skb->protocol = eth_type_trans(skb, ndev);
- netif_rx(skb);
- ndev->stats.rx_packets++;
- ndev->stats.rx_bytes += len;
-}
-
-static irqreturn_t
-netx_eth_interrupt(int irq, void *dev_id)
-{
- struct net_device *ndev = dev_id;
- struct netx_eth_priv *priv = netdev_priv(ndev);
- int status;
- unsigned long flags;
-
- spin_lock_irqsave(&priv->lock, flags);
-
- status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
- while (status) {
- int fill_level;
- writel(status, NETX_PFIFO_XPEC_ISR(priv->id));
-
- if ((status & ISR_CON_HI) || (status & ISR_IND_HI))
- printk("%s: unexpected status: 0x%08x\n",
- __func__, status);
-
- fill_level =
- readl(NETX_PFIFO_FILL_LEVEL(IND_FIFO_PORT_LO(priv->id)));
- while (fill_level--)
- netx_eth_receive(ndev);
-
- if (status & ISR_CON_LO)
- netif_wake_queue(ndev);
-
- if (status & ISR_LINK_STATUS_CHANGE)
- mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-
- status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
- }
- spin_unlock_irqrestore(&priv->lock, flags);
- return IRQ_HANDLED;
-}
-
-static int netx_eth_open(struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
-
- if (request_irq
- (ndev->irq, netx_eth_interrupt, IRQF_SHARED, ndev->name, ndev))
- return -EAGAIN;
-
- writel(ndev->dev_addr[0] |
- ndev->dev_addr[1]<<8 |
- ndev->dev_addr[2]<<16 |
- ndev->dev_addr[3]<<24,
- priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
- writel(ndev->dev_addr[4] |
- ndev->dev_addr[5]<<8,
- priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
- writel(LOCAL_CONFIG_LINK_STATUS_IRQ_EN |
- LOCAL_CONFIG_CON_LO_IRQ_EN |
- LOCAL_CONFIG_CON_HI_IRQ_EN |
- LOCAL_CONFIG_IND_LO_IRQ_EN |
- LOCAL_CONFIG_IND_HI_IRQ_EN,
- priv->xpec_base + NETX_XPEC_RAM_START_OFS +
- ETH_MAC_LOCAL_CONFIG);
-
- mii_check_media(&priv->mii, netif_msg_link(priv), 1);
- netif_start_queue(ndev);
-
- return 0;
-}
-
-static int netx_eth_close(struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
-
- netif_stop_queue(ndev);
-
- writel(0,
- priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_LOCAL_CONFIG);
-
- free_irq(ndev->irq, ndev);
-
- return 0;
-}
-
-static void netx_eth_timeout(struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
- int i;
-
- printk(KERN_ERR "%s: transmit timed out, resetting\n", ndev->name);
-
- spin_lock_irq(&priv->lock);
-
- xc_reset(priv->xc);
- xc_start(priv->xc);
-
- for (i=2; i<=18; i++)
- pfifo_push(EMPTY_PTR_FIFO(priv->id),
- FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
- spin_unlock_irq(&priv->lock);
-
- netif_wake_queue(ndev);
-}
-
-static int
-netx_eth_phy_read(struct net_device *ndev, int phy_id, int reg)
-{
- unsigned int val;
-
- val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
- MIIMU_REGADDR(reg) | MIIMU_PHY_NRES;
-
- writel(val, NETX_MIIMU);
- while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-
- return readl(NETX_MIIMU) >> 16;
-
-}
-
-static void
-netx_eth_phy_write(struct net_device *ndev, int phy_id, int reg, int value)
-{
- unsigned int val;
-
- val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
- MIIMU_REGADDR(reg) | MIIMU_PHY_NRES | MIIMU_OPMODE_WRITE |
- MIIMU_DATA(value);
-
- writel(val, NETX_MIIMU);
- while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-}
-
-static const struct net_device_ops netx_eth_netdev_ops = {
- .ndo_open = netx_eth_open,
- .ndo_stop = netx_eth_close,
- .ndo_start_xmit = netx_eth_hard_start_xmit,
- .ndo_tx_timeout = netx_eth_timeout,
- .ndo_set_rx_mode = netx_eth_set_multicast_list,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
-};
-
-static int netx_eth_enable(struct net_device *ndev)
-{
- struct netx_eth_priv *priv = netdev_priv(ndev);
- unsigned int mac4321, mac65;
- int running, i, ret;
- bool inv_mac_addr = false;
-
- ndev->netdev_ops = &netx_eth_netdev_ops;
- ndev->watchdog_timeo = msecs_to_jiffies(5000);
-
- priv->msg_enable = NETIF_MSG_LINK;
- priv->mii.phy_id_mask = 0x1f;
- priv->mii.reg_num_mask = 0x1f;
- priv->mii.force_media = 0;
- priv->mii.full_duplex = 0;
- priv->mii.dev = ndev;
- priv->mii.mdio_read = netx_eth_phy_read;
- priv->mii.mdio_write = netx_eth_phy_write;
- priv->mii.phy_id = INTERNAL_PHY_ADR + priv->id;
-
- running = xc_running(priv->xc);
- xc_stop(priv->xc);
-
- /* if the xc engine is already running, assume the bootloader has
- * loaded the firmware for us
- */
- if (running) {
- /* get Node Address from hardware */
- mac4321 = readl(priv->xpec_base +
- NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
- mac65 = readl(priv->xpec_base +
- NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
- ndev->dev_addr[0] = mac4321 & 0xff;
- ndev->dev_addr[1] = (mac4321 >> 8) & 0xff;
- ndev->dev_addr[2] = (mac4321 >> 16) & 0xff;
- ndev->dev_addr[3] = (mac4321 >> 24) & 0xff;
- ndev->dev_addr[4] = mac65 & 0xff;
- ndev->dev_addr[5] = (mac65 >> 8) & 0xff;
- } else {
- if (xc_request_firmware(priv->xc)) {
- printk(CARDNAME ": requesting firmware failed\n");
- return -ENODEV;
- }
- }
-
- xc_reset(priv->xc);
- xc_start(priv->xc);
-
- if (!is_valid_ether_addr(ndev->dev_addr))
- inv_mac_addr = true;
-
- for (i=2; i<=18; i++)
- pfifo_push(EMPTY_PTR_FIFO(priv->id),
- FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
- ret = register_netdev(ndev);
- if (inv_mac_addr)
- printk("%s: Invalid ethernet MAC address. Please set using ip\n",
- ndev->name);
-
- return ret;
-}
-
-static int netx_eth_drv_probe(struct platform_device *pdev)
-{
- struct netx_eth_priv *priv;
- struct net_device *ndev;
- struct netxeth_platform_data *pdata;
- int ret;
-
- ndev = alloc_etherdev(sizeof (struct netx_eth_priv));
- if (!ndev) {
- ret = -ENOMEM;
- goto exit;
- }
- SET_NETDEV_DEV(ndev, &pdev->dev);
-
- platform_set_drvdata(pdev, ndev);
-
- priv = netdev_priv(ndev);
-
- pdata = dev_get_platdata(&pdev->dev);
- priv->xc = request_xc(pdata->xcno, &pdev->dev);
- if (!priv->xc) {
- dev_err(&pdev->dev, "unable to request xc engine\n");
- ret = -ENODEV;
- goto exit_free_netdev;
- }
-
- ndev->irq = priv->xc->irq;
- priv->id = pdev->id;
- priv->xpec_base = priv->xc->xpec_base;
- priv->xmac_base = priv->xc->xmac_base;
- priv->sram_base = priv->xc->sram_base;
-
- spin_lock_init(&priv->lock);
-
- ret = pfifo_request(PFIFO_MASK(priv->id));
- if (ret) {
- printk("unable to request PFIFO\n");
- goto exit_free_xc;
- }
-
- ret = netx_eth_enable(ndev);
- if (ret)
- goto exit_free_pfifo;
-
- return 0;
-exit_free_pfifo:
- pfifo_free(PFIFO_MASK(priv->id));
-exit_free_xc:
- free_xc(priv->xc);
-exit_free_netdev:
- free_netdev(ndev);
-exit:
- return ret;
-}
-
-static int netx_eth_drv_remove(struct platform_device *pdev)
-{
- struct net_device *ndev = platform_get_drvdata(pdev);
- struct netx_eth_priv *priv = netdev_priv(ndev);
-
- unregister_netdev(ndev);
- xc_stop(priv->xc);
- free_xc(priv->xc);
- free_netdev(ndev);
- pfifo_free(PFIFO_MASK(priv->id));
-
- return 0;
-}
-
-static int netx_eth_drv_suspend(struct platform_device *pdev, pm_message_t state)
-{
- dev_err(&pdev->dev, "suspend not implemented\n");
- return 0;
-}
-
-static int netx_eth_drv_resume(struct platform_device *pdev)
-{
- dev_err(&pdev->dev, "resume not implemented\n");
- return 0;
-}
-
-static struct platform_driver netx_eth_driver = {
- .probe = netx_eth_drv_probe,
- .remove = netx_eth_drv_remove,
- .suspend = netx_eth_drv_suspend,
- .resume = netx_eth_drv_resume,
- .driver = {
- .name = CARDNAME,
- },
-};
-
-static int __init netx_eth_init(void)
-{
- unsigned int phy_control, val;
-
- printk("NetX Ethernet driver\n");
-
- phy_control = PHY_CONTROL_PHY_ADDRESS(INTERNAL_PHY_ADR>>1) |
- PHY_CONTROL_PHY1_MODE(PHY_MODE_ALL) |
- PHY_CONTROL_PHY1_AUTOMDIX |
- PHY_CONTROL_PHY1_EN |
- PHY_CONTROL_PHY0_MODE(PHY_MODE_ALL) |
- PHY_CONTROL_PHY0_AUTOMDIX |
- PHY_CONTROL_PHY0_EN |
- PHY_CONTROL_CLK_XLATIN;
-
- val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
- writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
- writel(phy_control | PHY_CONTROL_RESET, NETX_SYSTEM_PHY_CONTROL);
- udelay(100);
-
- val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
- writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
- writel(phy_control, NETX_SYSTEM_PHY_CONTROL);
-
- return platform_driver_register(&netx_eth_driver);
-}
-
-static void __exit netx_eth_cleanup(void)
-{
- platform_driver_unregister(&netx_eth_driver);
-}
-
-module_init(netx_eth_init);
-module_exit(netx_eth_cleanup);
-
-MODULE_AUTHOR("Sascha Hauer, Pengutronix");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" CARDNAME);
-MODULE_FIRMWARE("xc0.bin");
-MODULE_FIRMWARE("xc1.bin");
-MODULE_FIRMWARE("xc2.bin");
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 0b384f97d2fd..2761f3a3ae50 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1347,7 +1347,7 @@ static int nixge_probe(struct platform_device *pdev)
}
priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
- if (priv->phy_mode < 0) {
+ if ((int)priv->phy_mode < 0) {
netdev_err(ndev, "not find \"phy-mode\" property\n");
err = -EINVAL;
goto unregister_mdio;
diff --git a/drivers/net/ethernet/nxp/Kconfig b/drivers/net/ethernet/nxp/Kconfig
index 418afb84c84b..ee83a71c2509 100644
--- a/drivers/net/ethernet/nxp/Kconfig
+++ b/drivers/net/ethernet/nxp/Kconfig
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
config LPC_ENET
- tristate "NXP ethernet MAC on LPC devices"
- depends on ARCH_LPC32XX || COMPILE_TEST
- select PHYLIB
- help
+ tristate "NXP ethernet MAC on LPC devices"
+ depends on ARCH_LPC32XX || COMPILE_TEST
+ select PHYLIB
+ help
Say Y or M here if you want to use the NXP ethernet MAC included on
some NXP LPC devices. You can safely enable this option for LPC32xx
SoC. Also available as a module.
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 5ea570be8379..bd0583e409df 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -26,7 +26,7 @@ config IONIC
found in
<file:Documentation/networking/device_drivers/pensando/ionic.rst>.
- To compile this driver as a module, choose M here. The module
- will be called ionic.
+ To compile this driver as a module, choose M here. The module
+ will be called ionic.
endif # NET_VENDOR_PENSANDO
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 7afc4a365b75..bc03cecf80cc 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -57,7 +57,7 @@ DEFINE_SHOW_ATTRIBUTE(identity);
void ionic_debugfs_add_ident(struct ionic *ionic)
{
debugfs_create_file("identity", 0400, ionic->dentry,
- ionic, &identity_fops) ? 0 : -EOPNOTSUPP;
+ ionic, &identity_fops);
}
void ionic_debugfs_add_sizes(struct ionic *ionic)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index db7c82742828..72107a0627a9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1704,6 +1704,7 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
GFP_KERNEL);
if (!lif->rss_ind_tbl) {
+ err = -ENOMEM;
dev_err(dev, "Failed to allocate rss indirection table, aborting\n");
goto err_out_free_qcqs;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 0ae28f0d2523..004c0bfec41d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -779,8 +779,7 @@ qede_rx_build_skb(struct qede_dev *edev,
return NULL;
skb_reserve(skb, pad);
- memcpy(skb_put(skb, len),
- page_address(bd->data) + offset, len);
+ skb_put_data(skb, page_address(bd->data) + offset, len);
qede_reuse_page(rxq, bd);
goto out;
}
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 1502fe8b0456..55db7fbd43cc 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -2007,7 +2007,7 @@ static int netsec_probe(struct platform_device *pdev)
NETIF_MSG_LINK | NETIF_MSG_PROBE;
priv->phy_interface = device_get_phy_mode(&pdev->dev);
- if (priv->phy_interface < 0) {
+ if ((int)priv->phy_interface < 0) {
dev_err(&pdev->dev, "missing required property 'phy-mode'\n");
ret = -ENODEV;
goto free_ndev;
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 10d0c3e478ab..6e984d5a729f 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1566,7 +1566,7 @@ static int ave_probe(struct platform_device *pdev)
np = dev->of_node;
phy_mode = of_get_phy_mode(np);
- if (phy_mode < 0) {
+ if ((int)phy_mode < 0) {
dev_err(dev, "phy-mode not found\n");
return -EINVAL;
}
@@ -1662,19 +1662,19 @@ static int ave_probe(struct platform_device *pdev)
"socionext,syscon-phy-mode",
1, 0, &args);
if (ret) {
- netdev_err(ndev, "can't get syscon-phy-mode property\n");
+ dev_err(dev, "can't get syscon-phy-mode property\n");
goto out_free_netdev;
}
priv->regmap = syscon_node_to_regmap(args.np);
of_node_put(args.np);
if (IS_ERR(priv->regmap)) {
- netdev_err(ndev, "can't map syscon-phy-mode\n");
+ dev_err(dev, "can't map syscon-phy-mode\n");
ret = PTR_ERR(priv->regmap);
goto out_free_netdev;
}
ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
if (ret) {
- netdev_err(ndev, "invalid phy-mode setting\n");
+ dev_err(dev, "invalid phy-mode setting\n");
goto out_free_netdev;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 2c6d7c69c8f7..0d21082ceb93 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -191,7 +191,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
struct device *dev = &gmac->pdev->dev;
gmac->phy_mode = of_get_phy_mode(dev->of_node);
- if (gmac->phy_mode < 0) {
+ if ((int)gmac->phy_mode < 0) {
dev_err(dev, "missing phy mode property\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 9cda29e4b89d..306da8f6b7d5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -339,7 +339,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
dwmac->dev = &pdev->dev;
dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
- if (dwmac->phy_mode < 0) {
+ if ((int)dwmac->phy_mode < 0) {
dev_err(&pdev->dev, "missing phy-mode property\n");
ret = -EINVAL;
goto err_remove_config_dt;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index d5173dd02a71..2b277b2c586b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -523,19 +523,18 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw,
struct stmmac_rss *cfg, u32 num_rxq)
{
void __iomem *ioaddr = hw->pcsr;
- u32 *key = (u32 *)cfg->key;
int i, ret;
u32 value;
value = readl(ioaddr + XGMAC_RSS_CTRL);
- if (!cfg->enable) {
+ if (!cfg || !cfg->enable) {
value &= ~XGMAC_RSSE;
writel(value, ioaddr + XGMAC_RSS_CTRL);
return 0;
}
for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) {
- ret = dwxgmac2_rss_write_reg(ioaddr, true, i, *key++);
+ ret = dwxgmac2_rss_write_reg(ioaddr, true, i, cfg->key[i]);
if (ret)
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a6cb2aa60e64..d3232738fb25 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1557,13 +1557,15 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
for (queue = 0; queue < rx_count; queue++) {
struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
struct page_pool_params pp_params = { 0 };
+ unsigned int num_pages;
rx_q->queue_index = queue;
rx_q->priv_data = priv;
pp_params.flags = PP_FLAG_DMA_MAP;
pp_params.pool_size = DMA_RX_SIZE;
- pp_params.order = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+ num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+ pp_params.order = ilog2(num_pages);
pp_params.nid = dev_to_node(priv->device);
pp_params.dev = priv->device;
pp_params.dma_dir = DMA_FROM_DEVICE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index c56e89e1ae56..5f66f6161629 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -670,7 +670,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
unsigned int pkt_count;
int i, ret = 0;
- if (!phydev || !phydev->pause)
+ if (!phydev || (!phydev->pause && !phydev->asym_pause))
return -EOPNOTSUPP;
tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
@@ -1233,12 +1233,9 @@ static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
return -EOPNOTSUPP;
if (!priv->dma_cap.l3l4fnum)
return -EOPNOTSUPP;
- if (priv->rss.enable) {
- struct stmmac_rss rss = { .enable = false, };
-
- stmmac_rss_configure(priv, priv->hw, &rss,
+ if (priv->rss.enable)
+ stmmac_rss_configure(priv, priv->hw, NULL,
priv->plat->rx_queues_to_use);
- }
dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
if (!dissector) {
@@ -1357,12 +1354,9 @@ static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
return -EOPNOTSUPP;
if (!priv->dma_cap.l3l4fnum)
return -EOPNOTSUPP;
- if (priv->rss.enable) {
- struct stmmac_rss rss = { .enable = false, };
-
- stmmac_rss_configure(priv, priv->hw, &rss,
+ if (priv->rss.enable)
+ stmmac_rss_configure(priv, priv->hw, NULL,
priv->plat->rx_queues_to_use);
- }
dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
if (!dissector) {
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 4fc627fb4d11..676006f32f91 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1762,7 +1762,7 @@ static int axienet_probe(struct platform_device *pdev)
}
} else {
lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
- if (lp->phy_mode < 0) {
+ if ((int)lp->phy_mode < 0) {
ret = -EINVAL;
goto free_netdev;
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 8f46aa1ddec0..cb7637364b40 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1235,6 +1235,7 @@ deliver:
macsec_rxsa_put(rx_sa);
macsec_rxsc_put(rx_sc);
+ skb_orphan(skb);
ret = gro_cells_receive(&macsec->gro_cells, skb);
if (ret == NET_RX_SUCCESS)
count_rx(dev, skb->len);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 03be30cde552..fe602648b99f 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -460,9 +460,9 @@ config RENESAS_PHY
Supports the Renesas PHYs uPD60620 and uPD60620A.
config ROCKCHIP_PHY
- tristate "Driver for Rockchip Ethernet PHYs"
- ---help---
- Currently supports the integrated Ethernet PHY.
+ tristate "Driver for Rockchip Ethernet PHYs"
+ ---help---
+ Currently supports the integrated Ethernet PHY.
config SMSC_PHY
tristate "SMSC PHYs"
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3c8186f269f9..2fea5541c35a 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -763,6 +763,8 @@ static int ksz9031_get_features(struct phy_device *phydev)
* Whenever the device's Asymmetric Pause capability is set to 1,
* link-up may fail after a link-up to link-down transition.
*
+ * The Errata Sheet is for ksz9031, but ksz9021 has the same issue
+ *
* Workaround:
* Do not enable the Asymmetric Pause capability bit.
*/
@@ -1076,6 +1078,7 @@ static struct phy_driver ksphy_driver[] = {
/* PHY_GBIT_FEATURES */
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
+ .get_features = ksz9031_get_features,
.config_init = ksz9021_config_init,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c
index a221dd552c3c..a5bf0874c7d8 100644
--- a/drivers/net/phy/national.c
+++ b/drivers/net/phy/national.c
@@ -105,14 +105,17 @@ static void ns_giga_speed_fallback(struct phy_device *phydev, int mode)
static void ns_10_base_t_hdx_loopack(struct phy_device *phydev, int disable)
{
+ u16 lb_dis = BIT(1);
+
if (disable)
- ns_exp_write(phydev, 0x1c0, ns_exp_read(phydev, 0x1c0) | 1);
+ ns_exp_write(phydev, 0x1c0,
+ ns_exp_read(phydev, 0x1c0) | lb_dis);
else
ns_exp_write(phydev, 0x1c0,
- ns_exp_read(phydev, 0x1c0) & 0xfffe);
+ ns_exp_read(phydev, 0x1c0) & ~lb_dis);
pr_debug("10BASE-T HDX loopback %s\n",
- (ns_exp_read(phydev, 0x1c0) & 0x0001) ? "off" : "on");
+ (ns_exp_read(phydev, 0x1c0) & lb_dis) ? "off" : "on");
}
static int ns_config_init(struct phy_device *phydev)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a30e41a56085..9a1b006904a7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1415,6 +1415,8 @@ static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
netif_wake_queue(ppp->dev);
else
netif_stop_queue(ppp->dev);
+ } else {
+ kfree_skb(skb);
}
ppp_xmit_unlock(ppp);
}
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index dd614c2cd994..3ae70c7e6860 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1200,7 +1200,7 @@ err_kfree:
kfree_skb(skb);
err:
rcu_read_lock();
- tap = rcu_dereference(q->tap);
+ tap = rcu_dereference(q->tap);
if (tap && tap->count_tx_dropped)
tap->count_tx_dropped(tap);
rcu_read_unlock();
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 50c05d0f44cb..00cab3f43a4c 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -681,8 +681,12 @@ cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf)
u8 ep;
for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) {
-
e = intf->cur_altsetting->endpoint + ep;
+
+ /* ignore endpoints which cannot transfer data */
+ if (!usb_endpoint_maxp(&e->desc))
+ continue;
+
switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
case USB_ENDPOINT_XFER_INT:
if (usb_endpoint_dir_in(&e->desc)) {
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 58952a79b05f..dde05e2fdc3e 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -100,6 +100,11 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
int intr = 0;
e = alt->endpoint + ep;
+
+ /* ignore endpoints which cannot transfer data */
+ if (!usb_endpoint_maxp(&e->desc))
+ continue;
+
switch (e->desc.bmAttributes) {
case USB_ENDPOINT_XFER_INT:
if (!usb_endpoint_dir_in(&e->desc))
@@ -339,6 +344,8 @@ void usbnet_update_max_qlen(struct usbnet *dev)
{
enum usb_device_speed speed = dev->udev->speed;
+ if (!dev->rx_urb_size || !dev->hard_mtu)
+ goto insanity;
switch (speed) {
case USB_SPEED_HIGH:
dev->rx_qlen = MAX_QUEUE_MEMORY / dev->rx_urb_size;
@@ -355,6 +362,7 @@ void usbnet_update_max_qlen(struct usbnet *dev)
dev->tx_qlen = 5 * MAX_QUEUE_MEMORY / dev->hard_mtu;
break;
default:
+insanity:
dev->rx_qlen = dev->tx_qlen = 4;
}
}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 6e84328bdd40..a4b38a980c3c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1154,7 +1154,8 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
struct sk_buff *skb;
int err;
- if (family == AF_INET6 && !ipv6_mod_enabled())
+ if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) &&
+ !ipv6_mod_enabled())
return 0;
skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig
index d98d6ac90f3d..56616d988c96 100644
--- a/drivers/net/wireless/ath/Kconfig
+++ b/drivers/net/wireless/ath/Kconfig
@@ -34,7 +34,7 @@ config ATH_TRACEPOINTS
depends on ATH_DEBUG
depends on EVENT_TRACING
---help---
- This option enables tracepoints for atheros wireless drivers.
+ This option enables tracepoints for atheros wireless drivers.
Currently, ath9k makes use of this facility.
config ATH_REG_DYNAMIC_USER_REG_HINTS
diff --git a/drivers/net/wireless/ath/ar5523/Kconfig b/drivers/net/wireless/ath/ar5523/Kconfig
index 41d3c9a48b08..65b39c7d035d 100644
--- a/drivers/net/wireless/ath/ar5523/Kconfig
+++ b/drivers/net/wireless/ath/ar5523/Kconfig
@@ -5,5 +5,5 @@ config AR5523
select ATH_COMMON
select FW_LOADER
---help---
- This module add support for AR5523 based USB dongles such as D-Link
- DWL-G132, Netgear WPN111 and many more.
+ This module add support for AR5523 based USB dongles such as D-Link
+ DWL-G132, Netgear WPN111 and many more.
diff --git a/drivers/net/wireless/ath/ath6kl/Kconfig b/drivers/net/wireless/ath/ath6kl/Kconfig
index dcf8ca0dcc52..62c22fdcca38 100644
--- a/drivers/net/wireless/ath/ath6kl/Kconfig
+++ b/drivers/net/wireless/ath/ath6kl/Kconfig
@@ -2,7 +2,7 @@
config ATH6KL
tristate "Atheros mobile chipsets support"
depends on CFG80211
- ---help---
+ ---help---
This module adds core support for wireless adapters based on
Atheros AR6003 and AR6004 chipsets. You still need separate
bus drivers for USB and SDIO to be able to use real devices.
diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig
index 2d1247f61297..c99f42284465 100644
--- a/drivers/net/wireless/ath/ath9k/Kconfig
+++ b/drivers/net/wireless/ath/ath9k/Kconfig
@@ -148,7 +148,7 @@ config ATH9K_CHANNEL_CONTEXT
depends on ATH9K
default n
---help---
- This option enables channel context support in ath9k, which is needed
+ This option enables channel context support in ath9k, which is needed
for multi-channel concurrency. Enable this if P2P PowerSave support
is required.
diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
index 757eb765e17c..b1bce7aad399 100644
--- a/drivers/net/wireless/ath/carl9170/Kconfig
+++ b/drivers/net/wireless/ath/carl9170/Kconfig
@@ -41,9 +41,9 @@ config CARL9170_WPC
default y
config CARL9170_HWRNG
- bool "Random number generator"
- depends on CARL9170 && (HW_RANDOM = y || HW_RANDOM = CARL9170)
- default n
+ bool "Random number generator"
+ depends on CARL9170 && (HW_RANDOM = y || HW_RANDOM = CARL9170)
+ default n
help
Provides a hardware random number generator to the kernel.
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index cb13652491ad..598c1fba9dac 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -1012,11 +1012,11 @@ void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev)
skb_orphan(skb);
if (security && (wil->txrx_ops.rx_crypto_check(wil, skb) != 0)) {
+ wil_dbg_txrx(wil, "Rx drop %d bytes\n", skb->len);
dev_kfree_skb(skb);
ndev->stats.rx_dropped++;
stats->rx_replay++;
stats->rx_dropped++;
- wil_dbg_txrx(wil, "Rx drop %d bytes\n", skb->len);
return;
}
diff --git a/drivers/net/wireless/atmel/Kconfig b/drivers/net/wireless/atmel/Kconfig
index 809bdf331848..4c0556b3a5ba 100644
--- a/drivers/net/wireless/atmel/Kconfig
+++ b/drivers/net/wireless/atmel/Kconfig
@@ -20,22 +20,22 @@ config ATMEL
select FW_LOADER
select CRC32
---help---
- A driver 802.11b wireless cards based on the Atmel fast-vnet
- chips. This driver supports standard Linux wireless extensions.
+ A driver 802.11b wireless cards based on the Atmel fast-vnet
+ chips. This driver supports standard Linux wireless extensions.
- Many cards based on this chipset do not have flash memory
- and need their firmware loaded at start-up. If yours is
- one of these, you will need to provide a firmware image
- to be loaded into the card by the driver. The Atmel
- firmware package can be downloaded from
- <http://www.thekelleys.org.uk/atmel>
+ Many cards based on this chipset do not have flash memory
+ and need their firmware loaded at start-up. If yours is
+ one of these, you will need to provide a firmware image
+ to be loaded into the card by the driver. The Atmel
+ firmware package can be downloaded from
+ <http://www.thekelleys.org.uk/atmel>
config PCI_ATMEL
tristate "Atmel at76c506 PCI cards"
depends on ATMEL && PCI
---help---
- Enable support for PCI and mini-PCI cards containing the
- Atmel at76c506 chip.
+ Enable support for PCI and mini-PCI cards containing the
+ Atmel at76c506 chip.
config PCMCIA_ATMEL
tristate "Atmel at76c502/at76c504 PCMCIA cards"
@@ -48,11 +48,11 @@ config PCMCIA_ATMEL
Atmel at76c502 and at76c504 chips.
config AT76C50X_USB
- tristate "Atmel at76c503/at76c505/at76c505a USB cards"
- depends on MAC80211 && USB
- select FW_LOADER
- ---help---
- Enable support for USB Wireless devices using Atmel at76c503,
- at76c505 or at76c505a chips.
+ tristate "Atmel at76c503/at76c505/at76c505a USB cards"
+ depends on MAC80211 && USB
+ select FW_LOADER
+ ---help---
+ Enable support for USB Wireless devices using Atmel at76c503,
+ at76c505 or at76c505a chips.
endif # WLAN_VENDOR_ATMEL
diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig
index 5d2878a73732..ab17903ba9f8 100644
--- a/drivers/net/wireless/intel/ipw2x00/Kconfig
+++ b/drivers/net/wireless/intel/ipw2x00/Kconfig
@@ -13,37 +13,37 @@ config IPW2100
select LIB80211
select LIBIPW
---help---
- A driver for the Intel PRO/Wireless 2100 Network
+ A driver for the Intel PRO/Wireless 2100 Network
Connection 802.11b wireless network adapter.
- See <file:Documentation/networking/device_drivers/intel/ipw2100.txt>
+ See <file:Documentation/networking/device_drivers/intel/ipw2100.txt>
for information on the capabilities currently enabled in this driver
and for tips for debugging issues and problems.
In order to use this driver, you will need a firmware image for it.
- You can obtain the firmware from
- <http://ipw2100.sf.net/>. Once you have the firmware image, you
+ You can obtain the firmware from
+ <http://ipw2100.sf.net/>. Once you have the firmware image, you
will need to place it in /lib/firmware.
- You will also very likely need the Wireless Tools in order to
- configure your card:
+ You will also very likely need the Wireless Tools in order to
+ configure your card:
- <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+ <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+
+ It is recommended that you compile this driver as a module (M)
+ rather than built-in (Y). This driver requires firmware at device
+ initialization time, and when built-in this typically happens
+ before the filesystem is accessible (hence firmware will be
+ unavailable and initialization will fail). If you do choose to build
+ this driver into your kernel image, you can avoid this problem by
+ including the firmware and a firmware loader in an initramfs.
- It is recommended that you compile this driver as a module (M)
- rather than built-in (Y). This driver requires firmware at device
- initialization time, and when built-in this typically happens
- before the filesystem is accessible (hence firmware will be
- unavailable and initialization will fail). If you do choose to build
- this driver into your kernel image, you can avoid this problem by
- including the firmware and a firmware loader in an initramfs.
-
config IPW2100_MONITOR
- bool "Enable promiscuous mode"
- depends on IPW2100
- ---help---
+ bool "Enable promiscuous mode"
+ depends on IPW2100
+ ---help---
Enables promiscuous/monitor mode support for the ipw2100 driver.
- With this feature compiled into the driver, you can switch to
+ With this feature compiled into the driver, you can switch to
promiscuous mode via the Wireless Tool's Monitor mode. While in this
mode, no packets can be sent.
@@ -51,17 +51,17 @@ config IPW2100_DEBUG
bool "Enable full debugging output in IPW2100 module."
depends on IPW2100
---help---
- This option will enable debug tracing output for the IPW2100.
+ This option will enable debug tracing output for the IPW2100.
- This will result in the kernel module being ~60k larger. You can
- control which debug output is sent to the kernel log by setting the
- value in
+ This will result in the kernel module being ~60k larger. You can
+ control which debug output is sent to the kernel log by setting the
+ value in
/sys/bus/pci/drivers/ipw2100/debug_level
This entry will only exist if this option is enabled.
- If you are not trying to debug or develop the IPW2100 driver, you
+ If you are not trying to debug or develop the IPW2100 driver, you
most likely want to say N here.
config IPW2200
@@ -75,37 +75,37 @@ config IPW2200
select LIB80211
select LIBIPW
---help---
- A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network
- Connection adapters.
+ A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network
+ Connection adapters.
- See <file:Documentation/networking/device_drivers/intel/ipw2200.txt>
+ See <file:Documentation/networking/device_drivers/intel/ipw2200.txt>
for information on the capabilities currently enabled in this
driver and for tips for debugging issues and problems.
In order to use this driver, you will need a firmware image for it.
- You can obtain the firmware from
- <http://ipw2200.sf.net/>. See the above referenced README.ipw2200
+ You can obtain the firmware from
+ <http://ipw2200.sf.net/>. See the above referenced README.ipw2200
for information on where to install the firmware images.
- You will also very likely need the Wireless Tools in order to
- configure your card:
+ You will also very likely need the Wireless Tools in order to
+ configure your card:
- <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+ <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
- It is recommended that you compile this driver as a module (M)
- rather than built-in (Y). This driver requires firmware at device
- initialization time, and when built-in this typically happens
- before the filesystem is accessible (hence firmware will be
- unavailable and initialization will fail). If you do choose to build
- this driver into your kernel image, you can avoid this problem by
- including the firmware and a firmware loader in an initramfs.
+ It is recommended that you compile this driver as a module (M)
+ rather than built-in (Y). This driver requires firmware at device
+ initialization time, and when built-in this typically happens
+ before the filesystem is accessible (hence firmware will be
+ unavailable and initialization will fail). If you do choose to build
+ this driver into your kernel image, you can avoid this problem by
+ including the firmware and a firmware loader in an initramfs.
config IPW2200_MONITOR
- bool "Enable promiscuous mode"
- depends on IPW2200
- ---help---
+ bool "Enable promiscuous mode"
+ depends on IPW2200
+ ---help---
Enables promiscuous/monitor mode support for the ipw2200 driver.
- With this feature compiled into the driver, you can switch to
+ With this feature compiled into the driver, you can switch to
promiscuous mode via the Wireless Tool's Monitor mode. While in this
mode, no packets can be sent.
@@ -118,28 +118,28 @@ config IPW2200_PROMISCUOUS
depends on IPW2200_MONITOR
select IPW2200_RADIOTAP
---help---
- Enables the creation of a second interface prefixed 'rtap'.
- This second interface will provide every received in radiotap
+ Enables the creation of a second interface prefixed 'rtap'.
+ This second interface will provide every received in radiotap
format.
- This is useful for performing wireless network analysis while
- maintaining an active association.
+ This is useful for performing wireless network analysis while
+ maintaining an active association.
+
+ Example usage:
- Example usage:
+ % modprobe ipw2200 rtap_iface=1
+ % ifconfig rtap0 up
+ % tethereal -i rtap0
- % modprobe ipw2200 rtap_iface=1
- % ifconfig rtap0 up
- % tethereal -i rtap0
+ If you do not specify 'rtap_iface=1' as a module parameter then
+ the rtap interface will not be created and you will need to turn
+ it on via sysfs:
- If you do not specify 'rtap_iface=1' as a module parameter then
- the rtap interface will not be created and you will need to turn
- it on via sysfs:
-
- % echo 1 > /sys/bus/pci/drivers/ipw2200/*/rtap_iface
+ % echo 1 > /sys/bus/pci/drivers/ipw2200/*/rtap_iface
config IPW2200_QOS
- bool "Enable QoS support"
- depends on IPW2200
+ bool "Enable QoS support"
+ depends on IPW2200
config IPW2200_DEBUG
bool "Enable full debugging output in IPW2200 module."
diff --git a/drivers/net/wireless/intel/iwlegacy/Kconfig b/drivers/net/wireless/intel/iwlegacy/Kconfig
index e329fd7b09c0..100f55858b13 100644
--- a/drivers/net/wireless/intel/iwlegacy/Kconfig
+++ b/drivers/net/wireless/intel/iwlegacy/Kconfig
@@ -91,9 +91,9 @@ config IWLEGACY_DEBUG
any problems you may encounter.
config IWLEGACY_DEBUGFS
- bool "iwlegacy (iwl 3945/4965) debugfs support"
- depends on IWLEGACY && MAC80211_DEBUGFS
- ---help---
+ bool "iwlegacy (iwl 3945/4965) debugfs support"
+ depends on IWLEGACY && MAC80211_DEBUGFS
+ ---help---
Enable creation of debugfs files for the iwlegacy drivers. This
is a low-impact option that allows getting insight into the
driver's state at runtime.
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index 7dbc0d38bb3b..091d621ad25f 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -119,9 +119,9 @@ config IWLWIFI_DEBUG
any problems you may encounter.
config IWLWIFI_DEBUGFS
- bool "iwlwifi debugfs support"
- depends on MAC80211_DEBUGFS
- ---help---
+ bool "iwlwifi debugfs support"
+ depends on MAC80211_DEBUGFS
+ ---help---
Enable creation of debugfs files for the iwlwifi drivers. This
is a low-impact option that allows getting insight into the
driver's state at runtime.
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 014eca6596e2..32a5e4e5461f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -889,11 +889,13 @@ static bool iwl_mvm_sar_geo_support(struct iwl_mvm *mvm)
* firmware versions. Unfortunately, we don't have a TLV API
* flag to rely on, so rely on the major version which is in
* the first byte of ucode_ver. This was implemented
- * initially on version 38 and then backported to 36, 29 and
- * 17.
+ * initially on version 38 and then backported to29 and 17.
+ * The intention was to have it in 36 as well, but not all
+ * 8000 family got this feature enabled. The 8000 family is
+ * the only one using version 36, so skip this version
+ * entirely.
*/
return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 ||
- IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 36 ||
IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 ||
IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 32a708301cfc..f0c539b37ea7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -555,16 +555,19 @@ static int compare_temps(const void *a, const void *b)
return ((s16)le16_to_cpu(*(__le16 *)a) -
(s16)le16_to_cpu(*(__le16 *)b));
}
+#endif
int iwl_mvm_send_temp_report_ths_cmd(struct iwl_mvm *mvm)
{
struct temp_report_ths_cmd cmd = {0};
- int ret, i, j, idx = 0;
+ int ret;
+#ifdef CONFIG_THERMAL
+ int i, j, idx = 0;
lockdep_assert_held(&mvm->mutex);
if (!mvm->tz_device.tzone)
- return -EINVAL;
+ goto send;
/* The driver holds array of temperature trips that are unsorted
* and uncompressed, the FW should get it compressed and sorted
@@ -597,6 +600,7 @@ int iwl_mvm_send_temp_report_ths_cmd(struct iwl_mvm *mvm)
}
send:
+#endif
ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(PHY_OPS_GROUP,
TEMP_REPORTING_THRESHOLDS_CMD),
0, sizeof(cmd), &cmd);
@@ -607,6 +611,7 @@ send:
return ret;
}
+#ifdef CONFIG_THERMAL
static int iwl_mvm_tzone_get_temp(struct thermal_zone_device *device,
int *temperature)
{
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 275d5eaed3b7..842cd81704db 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -333,7 +333,6 @@ static int mt7615_driver_own(struct mt7615_dev *dev)
static int mt7615_load_patch(struct mt7615_dev *dev)
{
- const char *firmware = MT7615_ROM_PATCH;
const struct mt7615_patch_hdr *hdr;
const struct firmware *fw = NULL;
int len, ret, sem;
@@ -349,7 +348,7 @@ static int mt7615_load_patch(struct mt7615_dev *dev)
return -EAGAIN;
}
- ret = request_firmware(&fw, firmware, dev->mt76.dev);
+ ret = request_firmware(&fw, MT7615_ROM_PATCH, dev->mt76.dev);
if (ret)
goto out;
@@ -447,13 +446,11 @@ mt7615_mcu_send_ram_firmware(struct mt7615_dev *dev,
static int mt7615_load_ram(struct mt7615_dev *dev)
{
- const struct firmware *fw;
const struct mt7615_fw_trailer *hdr;
- const char *n9_firmware = MT7615_FIRMWARE_N9;
- const char *cr4_firmware = MT7615_FIRMWARE_CR4;
+ const struct firmware *fw;
int ret;
- ret = request_firmware(&fw, n9_firmware, dev->mt76.dev);
+ ret = request_firmware(&fw, MT7615_FIRMWARE_N9, dev->mt76.dev);
if (ret)
return ret;
@@ -482,7 +479,7 @@ static int mt7615_load_ram(struct mt7615_dev *dev)
release_firmware(fw);
- ret = request_firmware(&fw, cr4_firmware, dev->mt76.dev);
+ ret = request_firmware(&fw, MT7615_FIRMWARE_CR4, dev->mt76.dev);
if (ret)
return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index cef3fd43cb00..7963e302d705 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -26,9 +26,9 @@
#define MT7615_RX_RING_SIZE 1024
#define MT7615_RX_MCU_RING_SIZE 512
-#define MT7615_FIRMWARE_CR4 "mt7615_cr4.bin"
-#define MT7615_FIRMWARE_N9 "mt7615_n9.bin"
-#define MT7615_ROM_PATCH "mt7615_rom_patch.bin"
+#define MT7615_FIRMWARE_CR4 "mediatek/mt7615_cr4.bin"
+#define MT7615_FIRMWARE_N9 "mediatek/mt7615_n9.bin"
+#define MT7615_ROM_PATCH "mediatek/mt7615_rom_patch.bin"
#define MT7615_EEPROM_SIZE 1024
#define MT7615_TOKEN_SIZE 4096
diff --git a/drivers/net/wireless/ralink/rt2x00/Kconfig b/drivers/net/wireless/ralink/rt2x00/Kconfig
index 858f8aa3e616..f8a9244ce012 100644
--- a/drivers/net/wireless/ralink/rt2x00/Kconfig
+++ b/drivers/net/wireless/ralink/rt2x00/Kconfig
@@ -98,17 +98,17 @@ config RT2800PCI_RT53XX
bool "rt2800pci - Include support for rt53xx devices (EXPERIMENTAL)"
default y
---help---
- This adds support for rt53xx wireless chipset family to the
- rt2800pci driver.
- Supported chips: RT5390
+ This adds support for rt53xx wireless chipset family to the
+ rt2800pci driver.
+ Supported chips: RT5390
config RT2800PCI_RT3290
bool "rt2800pci - Include support for rt3290 devices (EXPERIMENTAL)"
default y
---help---
- This adds support for rt3290 wireless chipset family to the
- rt2800pci driver.
- Supported chips: RT3290
+ This adds support for rt3290 wireless chipset family to the
+ rt2800pci driver.
+ Supported chips: RT3290
endif
config RT2500USB
@@ -176,16 +176,16 @@ config RT2800USB_RT3573
config RT2800USB_RT53XX
bool "rt2800usb - Include support for rt53xx devices (EXPERIMENTAL)"
---help---
- This adds support for rt53xx wireless chipset family to the
- rt2800usb driver.
- Supported chips: RT5370
+ This adds support for rt53xx wireless chipset family to the
+ rt2800usb driver.
+ Supported chips: RT5370
config RT2800USB_RT55XX
bool "rt2800usb - Include support for rt55xx devices (EXPERIMENTAL)"
---help---
- This adds support for rt55xx wireless chipset family to the
- rt2800usb driver.
- Supported chips: RT5572
+ This adds support for rt55xx wireless chipset family to the
+ rt2800usb driver.
+ Supported chips: RT5572
config RT2800USB_UNKNOWN
bool "rt2800usb - Include support for unknown (USB) devices"
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index fc14b37d927d..b61b073031e5 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -707,9 +707,6 @@ int rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
rtwdev->h2c.last_box_num = 0;
rtwdev->h2c.seq = 0;
- rtw_fw_send_general_info(rtwdev);
- rtw_fw_send_phydm_info(rtwdev);
-
rtw_flag_set(rtwdev, RTW_FLAG_FW_RUNNING);
return 0;
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index fc8f6213fc8f..6dd457741b15 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -704,6 +704,10 @@ static int rtw_power_on(struct rtw_dev *rtwdev)
goto err_off;
}
+ /* send H2C after HCI has started */
+ rtw_fw_send_general_info(rtwdev);
+ rtw_fw_send_phydm_info(rtwdev);
+
wifi_only = !rtwdev->efuse.btcoex;
rtw_coex_power_on_setting(rtwdev);
rtw_coex_init_hw_config(rtwdev, wifi_only);
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 3fdb52a5789a..d90928be663b 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -90,16 +90,13 @@ static inline void *rtw_pci_get_tx_desc(struct rtw_pci_tx_ring *tx_ring, u8 idx)
return tx_ring->r.head + offset;
}
-static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
- struct rtw_pci_tx_ring *tx_ring)
+static void rtw_pci_free_tx_ring_skbs(struct rtw_dev *rtwdev,
+ struct rtw_pci_tx_ring *tx_ring)
{
struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
struct rtw_pci_tx_data *tx_data;
struct sk_buff *skb, *tmp;
dma_addr_t dma;
- u8 *head = tx_ring->r.head;
- u32 len = tx_ring->r.len;
- int ring_sz = len * tx_ring->r.desc_size;
/* free every skb remained in tx list */
skb_queue_walk_safe(&tx_ring->queue, skb, tmp) {
@@ -110,21 +107,30 @@ static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
pci_unmap_single(pdev, dma, skb->len, PCI_DMA_TODEVICE);
dev_kfree_skb_any(skb);
}
+}
+
+static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
+ struct rtw_pci_tx_ring *tx_ring)
+{
+ struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
+ u8 *head = tx_ring->r.head;
+ u32 len = tx_ring->r.len;
+ int ring_sz = len * tx_ring->r.desc_size;
+
+ rtw_pci_free_tx_ring_skbs(rtwdev, tx_ring);
/* free the ring itself */
pci_free_consistent(pdev, ring_sz, head, tx_ring->r.dma);
tx_ring->r.head = NULL;
}
-static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
- struct rtw_pci_rx_ring *rx_ring)
+static void rtw_pci_free_rx_ring_skbs(struct rtw_dev *rtwdev,
+ struct rtw_pci_rx_ring *rx_ring)
{
struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
struct sk_buff *skb;
- dma_addr_t dma;
- u8 *head = rx_ring->r.head;
int buf_sz = RTK_PCI_RX_BUF_SIZE;
- int ring_sz = rx_ring->r.desc_size * rx_ring->r.len;
+ dma_addr_t dma;
int i;
for (i = 0; i < rx_ring->r.len; i++) {
@@ -137,6 +143,16 @@ static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
dev_kfree_skb(skb);
rx_ring->buf[i] = NULL;
}
+}
+
+static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
+ struct rtw_pci_rx_ring *rx_ring)
+{
+ struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
+ u8 *head = rx_ring->r.head;
+ int ring_sz = rx_ring->r.desc_size * rx_ring->r.len;
+
+ rtw_pci_free_rx_ring_skbs(rtwdev, rx_ring);
pci_free_consistent(pdev, ring_sz, head, rx_ring->r.dma);
}
@@ -484,6 +500,17 @@ static void rtw_pci_dma_reset(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci)
rtwpci->rx_tag = 0;
}
+static void rtw_pci_dma_release(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci)
+{
+ struct rtw_pci_tx_ring *tx_ring;
+ u8 queue;
+
+ for (queue = 0; queue < RTK_MAX_TX_QUEUE_NUM; queue++) {
+ tx_ring = &rtwpci->tx_rings[queue];
+ rtw_pci_free_tx_ring_skbs(rtwdev, tx_ring);
+ }
+}
+
static int rtw_pci_start(struct rtw_dev *rtwdev)
{
struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
@@ -505,6 +532,7 @@ static void rtw_pci_stop(struct rtw_dev *rtwdev)
spin_lock_irqsave(&rtwpci->irq_lock, flags);
rtw_pci_disable_interrupt(rtwdev, rtwpci);
+ rtw_pci_dma_release(rtwdev, rtwpci);
spin_unlock_irqrestore(&rtwpci->irq_lock, flags);
}
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index 4e44ea8c652d..7b5c2fe5bd4d 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -1633,7 +1633,7 @@ static bool check_read_regs(struct zd_usb *usb, struct usb_req_read_regs *req,
*/
if (rr->length < struct_size(regs, regs, count)) {
dev_dbg_f(zd_usb_dev(usb),
- "error: actual length %d less than expected %ld\n",
+ "error: actual length %d less than expected %zu\n",
rr->length, struct_size(regs, regs, count));
return false;
}
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 7eda62a9e0df..9642971e89ce 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -661,7 +661,7 @@ static int st95hf_error_handling(struct st95hf_context *stcontext,
result = -ETIMEDOUT;
else
result = -EIO;
- return result;
+ return result;
}
/* Check for CRC err only if CRC is present in the tag response */
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 000b95787df1..bd6129db6417 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -362,7 +362,7 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev,
int ret;
iface = of_get_phy_mode(np);
- if (iface < 0)
+ if ((int)iface < 0)
return NULL;
if (of_phy_is_fixed_link(np)) {
ret = of_phy_register_fixed_link(np);
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 9c18476d8d10..67d0199840fd 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -155,7 +155,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EINVAL;
break;
} else if (cmd == PTP_EXTTS_REQUEST) {
- req.extts.flags &= ~PTP_EXTTS_VALID_FLAGS;
+ req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
req.extts.rsv[0] = 0;
req.extts.rsv[1] = 0;
}
@@ -184,7 +184,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EINVAL;
break;
} else if (cmd == PTP_PEROUT_REQUEST) {
- req.perout.flags &= ~PTP_PEROUT_VALID_FLAGS;
+ req.perout.flags &= PTP_PEROUT_V1_VALID_FLAGS;
req.perout.rsv[0] = 0;
req.perout.rsv[1] = 0;
req.perout.rsv[2] = 0;
diff --git a/fs/exec.c b/fs/exec.c
index f7f6a140856a..555e93c7dec8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
}
task_lock(tsk);
active_mm = tsk->active_mm;
+ membarrier_exec_mmap(mm);
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
@@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
- membarrier_execve(current);
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f33881688f42..fb07b503dc45 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
}
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
- int node);
+ int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
#else
#define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+ alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+ alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index bac395f1d00a..5228c62af416 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,8 +139,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
unsigned long addr);
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
- unsigned long addr);
bool vma_policy_mof(struct vm_area_struct *vma);
extern void numa_default_policy(void);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a487b681b516..138c50d5a353 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -282,7 +282,6 @@ enum {
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
- MLX5_CMD_OP_SYNC_STEERING = 0xb00,
MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
@@ -296,6 +295,7 @@ enum {
MLX5_CMD_OP_DESTROY_UCTX = 0xa06,
MLX5_CMD_OP_CREATE_UMEM = 0xa08,
MLX5_CMD_OP_DESTROY_UMEM = 0xa0a,
+ MLX5_CMD_OP_SYNC_STEERING = 0xb00,
MLX5_CMD_OP_MAX
};
@@ -487,7 +487,7 @@ union mlx5_ifc_gre_key_bits {
struct mlx5_ifc_fte_match_set_misc_bits {
u8 gre_c_present[0x1];
- u8 reserved_auto1[0x1];
+ u8 reserved_at_1[0x1];
u8 gre_k_present[0x1];
u8 gre_s_present[0x1];
u8 source_vhca_port[0x4];
@@ -5054,50 +5054,50 @@ struct mlx5_ifc_query_hca_cap_in_bits {
struct mlx5_ifc_other_hca_cap_bits {
u8 roce[0x1];
- u8 reserved_0[0x27f];
+ u8 reserved_at_1[0x27f];
};
struct mlx5_ifc_query_other_hca_cap_out_bits {
u8 status[0x8];
- u8 reserved_0[0x18];
+ u8 reserved_at_8[0x18];
u8 syndrome[0x20];
- u8 reserved_1[0x40];
+ u8 reserved_at_40[0x40];
struct mlx5_ifc_other_hca_cap_bits other_capability;
};
struct mlx5_ifc_query_other_hca_cap_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 reserved_at_10[0x10];
- u8 reserved_1[0x10];
+ u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_2[0x10];
+ u8 reserved_at_40[0x10];
u8 function_id[0x10];
- u8 reserved_3[0x20];
+ u8 reserved_at_60[0x20];
};
struct mlx5_ifc_modify_other_hca_cap_out_bits {
u8 status[0x8];
- u8 reserved_0[0x18];
+ u8 reserved_at_8[0x18];
u8 syndrome[0x20];
- u8 reserved_1[0x40];
+ u8 reserved_at_40[0x40];
};
struct mlx5_ifc_modify_other_hca_cap_in_bits {
u8 opcode[0x10];
- u8 reserved_0[0x10];
+ u8 reserved_at_10[0x10];
- u8 reserved_1[0x10];
+ u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_2[0x10];
+ u8 reserved_at_40[0x10];
u8 function_id[0x10];
u8 field_select[0x20];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5183e0d77dfa..2222fa795284 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -383,6 +383,16 @@ struct mm_struct {
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
+#ifdef CONFIG_MEMBARRIER
+ /**
+ * @membarrier_state: Flags controlling membarrier behavior.
+ *
+ * This field is close to @pgd to hopefully fit in the same
+ * cache-line, which needs to be touched by switch_mm().
+ */
+ atomic_t membarrier_state;
+#endif
+
/**
* @mm_users: The number of users including userspace.
*
@@ -452,9 +462,7 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access */
struct core_state *core_state; /* coredumping support */
-#ifdef CONFIG_MEMBARRIER
- atomic_t membarrier_state;
-#endif
+
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
diff --git a/include/linux/platform_data/eth-netx.h b/include/linux/platform_data/eth-netx.h
deleted file mode 100644
index a3a6322668d8..000000000000
--- a/include/linux/platform_data/eth-netx.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#ifndef __ETH_NETX_H
-#define __ETH_NETX_H
-
-struct netxeth_platform_data {
- unsigned int xcno; /* number of xmac/xpec engine this eth uses */
-};
-
-#endif
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
/*
* rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
*
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
*/
struct rcuwait {
struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
*/
#define rcuwait_wait_event(w, condition) \
({ \
- /* \
- * Complain if we are called after do_exit()/exit_notify(), \
- * as we cannot rely on the rcu critical region for the \
- * wakeup side. \
- */ \
- WARN_ON(current->exit_state); \
- \
rcu_assign_pointer((w)->task, current); \
for (;;) { \
/* \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70db597d6fd4..2c2e56bd8913 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1130,7 +1130,10 @@ struct task_struct {
struct tlbflush_unmap_batch tlb_ubc;
- struct rcu_head rcu;
+ union {
+ refcount_t rcu_users;
+ struct rcu_head rcu;
+ };
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
@@ -1839,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
* running or not.
*/
#ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu) false
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return false;
+}
#endif
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 4a7944078cc3..e6770012db18 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -362,16 +362,16 @@ enum {
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
+ if (current->mm != mm)
+ return;
if (likely(!(atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
return;
sync_core_before_usermode();
}
-static inline void membarrier_execve(struct task_struct *t)
-{
- atomic_set(&t->mm->membarrier_state, 0);
-}
+extern void membarrier_exec_mmap(struct mm_struct *mm);
+
#else
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
{
}
#endif
-static inline void membarrier_execve(struct task_struct *t)
+static inline void membarrier_exec_mmap(struct mm_struct *mm)
{
}
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3d90ed8f75f0..4b1c3b664f51 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
extern int arch_task_struct_size __read_mostly;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 907209c0794e..e7d3b1a513ef 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4144,8 +4144,17 @@ static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
return NULL;
}
+
+static inline void skb_ext_reset(struct sk_buff *skb)
+{
+ if (unlikely(skb->active_extensions)) {
+ __skb_ext_put(skb->extensions);
+ skb->active_extensions = 0;
+ }
+}
#else
static inline void skb_ext_put(struct sk_buff *skb) {}
+static inline void skb_ext_reset(struct sk_buff *skb) {}
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index aef38c140014..dfd919b3119e 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -71,6 +71,7 @@ struct inet_timewait_sock {
tw_pad : 2, /* 2 bits hole */
tw_tos : 8;
u32 tw_txhash;
+ u32 tw_priority;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
};
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8dfc65639aa4..009605c56f20 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -981,7 +981,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
* upper-layer output functions
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass);
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2655e03dbe1b..001d294edf57 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -889,6 +889,8 @@ enum nft_chain_flags {
NFT_CHAIN_HW_OFFLOAD = 0x2,
};
+#define NFT_CHAIN_POLICY_UNSET U8_MAX
+
/**
* struct nft_chain - nf_tables chain
*
@@ -1181,6 +1183,10 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
const struct nlattr *nla,
u8 genmask);
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable,
+ enum nft_trans_phase phase);
+
void nft_register_flowtable_type(struct nf_flowtable_type *type);
void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
diff --git a/include/net/route.h b/include/net/route.h
index dfce19c9fa96..6c516840380d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -53,10 +53,11 @@ struct rtable {
unsigned int rt_flags;
__u16 rt_type;
__u8 rt_is_input;
- u8 rt_gw_family;
+ __u8 rt_uses_gateway;
int rt_iif;
+ u8 rt_gw_family;
/* Info on neighbour */
union {
__be32 rt_gw4;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 43f5b7ed02bd..637548d54b3e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -494,6 +494,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
return q;
}
+static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
+{
+ return rcu_dereference_bh(qdisc->dev_queue->qdisc);
+}
+
static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->qdisc_sleeping;
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 63ae4a39e58b..c02dec97e1ce 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -22,9 +22,9 @@ struct btf_header {
};
/* Max # of type identifier */
-#define BTF_MAX_TYPE 0x0000ffff
+#define BTF_MAX_TYPE 0x000fffff
/* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET 0x0000ffff
+#define BTF_MAX_NAME_OFFSET 0x00ffffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 3b86c14ea49d..8076c940ffeb 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -123,7 +123,7 @@ struct ebt_entry_match {
union {
struct {
char name[EBT_EXTENSION_MAXNAMELEN];
- uint8_t revision;
+ __u8 revision;
};
struct xt_match *match;
} u;
@@ -136,7 +136,7 @@ struct ebt_entry_watcher {
union {
struct {
char name[EBT_EXTENSION_MAXNAMELEN];
- uint8_t revision;
+ __u8 revision;
};
struct xt_target *watcher;
} u;
@@ -149,7 +149,7 @@ struct ebt_entry_target {
union {
struct {
char name[EBT_EXTENSION_MAXNAMELEN];
- uint8_t revision;
+ __u8 revision;
};
struct xt_target *target;
} u;
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index f16301015949..59e89a1bc3bb 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -31,15 +31,37 @@
#define PTP_ENABLE_FEATURE (1<<0)
#define PTP_RISING_EDGE (1<<1)
#define PTP_FALLING_EDGE (1<<2)
+
+/*
+ * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
+ */
#define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \
PTP_RISING_EDGE | \
PTP_FALLING_EDGE)
/*
+ * flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
+ * DO NOT ADD NEW FLAGS HERE.
+ */
+#define PTP_EXTTS_V1_VALID_FLAGS (PTP_ENABLE_FEATURE | \
+ PTP_RISING_EDGE | \
+ PTP_FALLING_EDGE)
+
+/*
* Bits of the ptp_perout_request.flags field:
*/
#define PTP_PEROUT_ONE_SHOT (1<<0)
+
+/*
+ * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl.
+ */
#define PTP_PEROUT_VALID_FLAGS (PTP_PEROUT_ONE_SHOT)
+
+/*
+ * No flags are valid for the original PTP_PEROUT_REQUEST ioctl
+ */
+#define PTP_PEROUT_V1_VALID_FLAGS (0)
+
/*
* struct ptp_clock_time - represents a time value
*
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index adb3adcebe3c..29c7c06c6bd6 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2332,7 +2332,7 @@ static int btf_enum_check_kflag_member(struct btf_verifier_env *env,
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
btf_verifier_log_member(env, struct_type, member,
"Member is not byte aligned");
- return -EINVAL;
+ return -EINVAL;
}
nr_bits = int_bitsize;
@@ -2377,9 +2377,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (t->size != sizeof(int)) {
- btf_verifier_log_type(env, t, "Expected size:%zu",
- sizeof(int));
+ if (t->size > 8 || !is_power_of_2(t->size)) {
+ btf_verifier_log_type(env, t, "Unexpected size");
return -EINVAL;
}
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 942c662e2eed..82a1ffe15dfa 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -37,7 +37,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
if (!node)
- return NULL;
+ return ERR_PTR(-ENOMEM);
err = xsk_map_inc(map);
if (err) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 22ab6a4bdc51..a46a50d67002 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(tsk);
}
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+ if (refcount_dec_and_test(&task->rcu_users))
+ call_rcu(&task->rcu, delayed_put_task_struct);
+}
void release_task(struct task_struct *p)
{
@@ -222,76 +227,13 @@ repeat:
write_unlock_irq(&tasklist_lock);
release_thread(p);
- call_rcu(&p->rcu, delayed_put_task_struct);
+ put_task_struct_rcu_user(p);
p = leader;
if (unlikely(zap_leader))
goto repeat;
}
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
- struct sighand_struct *sighand;
- struct task_struct *task;
-
- /*
- * We need to verify that release_task() was not called and thus
- * delayed_put_task_struct() can't run and drop the last reference
- * before rcu_read_unlock(). We check task->sighand != NULL,
- * but we can read the already freed and reused memory.
- */
-retry:
- task = rcu_dereference(*ptask);
- if (!task)
- return NULL;
-
- probe_kernel_address(&task->sighand, sighand);
-
- /*
- * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
- * was already freed we can not miss the preceding update of this
- * pointer.
- */
- smp_rmb();
- if (unlikely(task != READ_ONCE(*ptask)))
- goto retry;
-
- /*
- * We've re-checked that "task == *ptask", now we have two different
- * cases:
- *
- * 1. This is actually the same task/task_struct. In this case
- * sighand != NULL tells us it is still alive.
- *
- * 2. This is another task which got the same memory for task_struct.
- * We can't know this of course, and we can not trust
- * sighand != NULL.
- *
- * In this case we actually return a random value, but this is
- * correct.
- *
- * If we return NULL - we can pretend that we actually noticed that
- * *ptask was updated when the previous task has exited. Or pretend
- * that probe_slab_address(&sighand) reads NULL.
- *
- * If we return the new task (because sighand is not NULL for any
- * reason) - this is fine too. This (new) task can't go away before
- * another gp pass.
- *
- * And note: We could even eliminate the false positive if re-read
- * task->sighand once again to avoid the falsely NULL. But this case
- * is very unlikely so we don't care.
- */
- if (!sighand)
- return NULL;
-
- return task;
-}
-
void rcuwait_wake_up(struct rcuwait *w)
{
struct task_struct *task;
@@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
*/
smp_mb(); /* (B) */
- /*
- * Avoid using task_rcu_dereference() magic as long as we are careful,
- * see comment in rcuwait_wait_event() regarding ->exit_state.
- */
task = rcu_dereference(w->task);
if (task)
wake_up_process(task);
diff --git a/kernel/fork.c b/kernel/fork.c
index 60763c043aa3..f9572f416126 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -915,10 +915,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->cpus_ptr = &tsk->cpus_mask;
/*
- * One for us, one for whoever does the "release_task()" (usually
- * parent)
+ * One for the user space visible state that goes away when reaped.
+ * One for the scheduler.
*/
- refcount_set(&tsk->usage, 2);
+ refcount_set(&tsk->rcu_users, 2);
+ /* One for the rcu users */
+ refcount_set(&tsk->usage, 1);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f9a1346a5fa9..7880f4f64d0e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1656,7 +1656,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
- if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
+ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+ if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL;
goto out;
}
@@ -1677,7 +1678,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
@@ -3254,7 +3254,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
/* Task is done with its stack. */
put_task_stack(prev);
- put_task_struct(prev);
+ put_task_struct_rcu_user(prev);
}
tick_nohz_task_switch();
@@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
else
prev->active_mm = NULL;
} else { // to user
+ membarrier_switch_mm(rq, prev->active_mm, next->mm);
/*
* sys_membarrier() requires an smp_mb() between setting
- * rq->curr and returning to userspace.
+ * rq->curr / membarrier_switch_mm() and returning to userspace.
*
* The below provides this either through switch_mm(), or in
* case 'prev->active_mm == next->mm' through
* finish_task_switch()'s mmdrop().
*/
-
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
@@ -4042,7 +4042,11 @@ static void __sched notrace __schedule(bool preempt)
if (likely(prev != next)) {
rq->nr_switches++;
- rq->curr = next;
+ /*
+ * RCU users of rcu_dereference(rq->curr) may not see
+ * changes to task_struct made by pick_next_task().
+ */
+ RCU_INIT_POINTER(rq->curr, next);
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
@@ -4223,9 +4227,8 @@ static void __sched notrace preempt_schedule_common(void)
#ifdef CONFIG_PREEMPTION
/*
- * this is the entry point to schedule() from in-kernel preemption
- * off of preempt_enable. Kernel preemptions off return from interrupt
- * occur there and call schedule directly.
+ * This is the entry point to schedule() from in-kernel preemption
+ * off of preempt_enable.
*/
asmlinkage __visible void __sched notrace preempt_schedule(void)
{
@@ -4296,7 +4299,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
#endif /* CONFIG_PREEMPTION */
/*
- * this is the entry point to schedule() from kernel preemption
+ * This is the entry point to schedule() from kernel preemption
* off of irq context.
* Note, that this is called and return with irqs disabled. This will
* protect us against recursive calling from irq.
@@ -6069,7 +6072,8 @@ void init_idle(struct task_struct *idle, int cpu)
__set_task_cpu(idle, cpu);
rcu_read_unlock();
- rq->curr = rq->idle = idle;
+ rq->idle = idle;
+ rcu_assign_pointer(rq->curr, idle);
idle->on_rq = TASK_ON_RQ_QUEUED;
#ifdef CONFIG_SMP
idle->on_cpu = 1;
@@ -6430,8 +6434,6 @@ int sched_cpu_activate(unsigned int cpu)
}
rq_unlock_irqrestore(rq, &rf);
- update_max_interval();
-
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d4bbf68c3161..83ab35e2374f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -749,7 +749,6 @@ void init_entity_runnable_average(struct sched_entity *se)
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
}
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
static void attach_entity_cfs_rq(struct sched_entity *se);
/*
@@ -1603,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
return;
rcu_read_lock();
- cur = task_rcu_dereference(&dst_rq->curr);
+ cur = rcu_dereference(dst_rq->curr);
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
cur = NULL;
@@ -4354,21 +4353,16 @@ static inline u64 sched_cfs_bandwidth_slice(void)
}
/*
- * Replenish runtime according to assigned quota and update expiration time.
- * We use sched_clock_cpu directly instead of rq->clock to avoid adding
- * additional synchronization around rq->lock.
+ * Replenish runtime according to assigned quota. We use sched_clock_cpu
+ * directly instead of rq->clock to avoid adding additional synchronization
+ * around rq->lock.
*
* requires cfs_b->lock
*/
void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
{
- u64 now;
-
- if (cfs_b->quota == RUNTIME_INF)
- return;
-
- now = sched_clock_cpu(smp_processor_id());
- cfs_b->runtime = cfs_b->quota;
+ if (cfs_b->quota != RUNTIME_INF)
+ cfs_b->runtime = cfs_b->quota;
}
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4376,15 +4370,6 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
return &tg->cfs_bandwidth;
}
-/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
- if (unlikely(cfs_rq->throttle_count))
- return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
-
- return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
-}
-
/* returns 0 on failure to allocate runtime */
static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
@@ -4476,7 +4461,6 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->throttle_count--;
if (!cfs_rq->throttle_count) {
- /* adjust cfs_rq_clock_task() */
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
cfs_rq->throttled_clock_task;
@@ -4994,15 +4978,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
- u64 overrun;
-
lockdep_assert_held(&cfs_b->lock);
if (cfs_b->period_active)
return;
cfs_b->period_active = 1;
- overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+ hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
}
@@ -5080,11 +5062,6 @@ static inline bool cfs_bandwidth_used(void)
return false;
}
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
-{
- return rq_clock_task(rq_of(cfs_rq));
-}
-
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
@@ -6412,7 +6389,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
/* Evaluate the energy impact of using this CPU. */
- if (max_spare_cap_cpu >= 0) {
+ if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..a39bed2c784f 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -30,10 +30,42 @@ static void ipi_mb(void *info)
smp_mb(); /* IPIs should be serializing but paranoid. */
}
+static void ipi_sync_rq_state(void *info)
+{
+ struct mm_struct *mm = (struct mm_struct *) info;
+
+ if (current->mm != mm)
+ return;
+ this_cpu_write(runqueues.membarrier_state,
+ atomic_read(&mm->membarrier_state));
+ /*
+ * Issue a memory barrier after setting
+ * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
+ * guarantee that no memory access following registration is reordered
+ * before registration.
+ */
+ smp_mb();
+}
+
+void membarrier_exec_mmap(struct mm_struct *mm)
+{
+ /*
+ * Issue a memory barrier before clearing membarrier_state to
+ * guarantee that no memory access prior to exec is reordered after
+ * clearing this state.
+ */
+ smp_mb();
+ atomic_set(&mm->membarrier_state, 0);
+ /*
+ * Keep the runqueue membarrier_state in sync with this mm
+ * membarrier_state.
+ */
+ this_cpu_write(runqueues.membarrier_state, 0);
+}
+
static int membarrier_global_expedited(void)
{
int cpu;
- bool fallback = false;
cpumask_var_t tmpmask;
if (num_online_cpus() == 1)
@@ -45,17 +77,11 @@ static int membarrier_global_expedited(void)
*/
smp_mb(); /* system call entry is not a mb. */
- /*
- * Expedited membarrier commands guarantee that they won't
- * block, hence the GFP_NOWAIT allocation flag and fallback
- * implementation.
- */
- if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
- /* Fallback for OOM. */
- fallback = true;
- }
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
cpus_read_lock();
+ rcu_read_lock();
for_each_online_cpu(cpu) {
struct task_struct *p;
@@ -70,23 +96,28 @@ static int membarrier_global_expedited(void)
if (cpu == raw_smp_processor_id())
continue;
- rcu_read_lock();
- p = task_rcu_dereference(&cpu_rq(cpu)->curr);
- if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
- MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
- if (!fallback)
- __cpumask_set_cpu(cpu, tmpmask);
- else
- smp_call_function_single(cpu, ipi_mb, NULL, 1);
- }
- rcu_read_unlock();
- }
- if (!fallback) {
- preempt_disable();
- smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
- preempt_enable();
- free_cpumask_var(tmpmask);
+ if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
+ MEMBARRIER_STATE_GLOBAL_EXPEDITED))
+ continue;
+
+ /*
+ * Skip the CPU if it runs a kernel thread. The scheduler
+ * leaves the prior task mm in place as an optimization when
+ * scheduling a kthread.
+ */
+ p = rcu_dereference(cpu_rq(cpu)->curr);
+ if (p->flags & PF_KTHREAD)
+ continue;
+
+ __cpumask_set_cpu(cpu, tmpmask);
}
+ rcu_read_unlock();
+
+ preempt_disable();
+ smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+ preempt_enable();
+
+ free_cpumask_var(tmpmask);
cpus_read_unlock();
/*
@@ -101,22 +132,22 @@ static int membarrier_global_expedited(void)
static int membarrier_private_expedited(int flags)
{
int cpu;
- bool fallback = false;
cpumask_var_t tmpmask;
+ struct mm_struct *mm = current->mm;
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
return -EINVAL;
- if (!(atomic_read(&current->mm->membarrier_state) &
+ if (!(atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
return -EPERM;
} else {
- if (!(atomic_read(&current->mm->membarrier_state) &
+ if (!(atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
return -EPERM;
}
- if (num_online_cpus() == 1)
+ if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
return 0;
/*
@@ -125,17 +156,11 @@ static int membarrier_private_expedited(int flags)
*/
smp_mb(); /* system call entry is not a mb. */
- /*
- * Expedited membarrier commands guarantee that they won't
- * block, hence the GFP_NOWAIT allocation flag and fallback
- * implementation.
- */
- if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
- /* Fallback for OOM. */
- fallback = true;
- }
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
cpus_read_lock();
+ rcu_read_lock();
for_each_online_cpu(cpu) {
struct task_struct *p;
@@ -150,21 +175,17 @@ static int membarrier_private_expedited(int flags)
if (cpu == raw_smp_processor_id())
continue;
rcu_read_lock();
- p = task_rcu_dereference(&cpu_rq(cpu)->curr);
- if (p && p->mm == current->mm) {
- if (!fallback)
- __cpumask_set_cpu(cpu, tmpmask);
- else
- smp_call_function_single(cpu, ipi_mb, NULL, 1);
- }
- rcu_read_unlock();
- }
- if (!fallback) {
- preempt_disable();
- smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
- preempt_enable();
- free_cpumask_var(tmpmask);
+ p = rcu_dereference(cpu_rq(cpu)->curr);
+ if (p && p->mm == mm)
+ __cpumask_set_cpu(cpu, tmpmask);
}
+ rcu_read_unlock();
+
+ preempt_disable();
+ smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+ preempt_enable();
+
+ free_cpumask_var(tmpmask);
cpus_read_unlock();
/*
@@ -177,32 +198,78 @@ static int membarrier_private_expedited(int flags)
return 0;
}
+static int sync_runqueues_membarrier_state(struct mm_struct *mm)
+{
+ int membarrier_state = atomic_read(&mm->membarrier_state);
+ cpumask_var_t tmpmask;
+ int cpu;
+
+ if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
+ this_cpu_write(runqueues.membarrier_state, membarrier_state);
+
+ /*
+ * For single mm user, we can simply issue a memory barrier
+ * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
+ * mm and in the current runqueue to guarantee that no memory
+ * access following registration is reordered before
+ * registration.
+ */
+ smp_mb();
+ return 0;
+ }
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ /*
+ * For mm with multiple users, we need to ensure all future
+ * scheduler executions will observe @mm's new membarrier
+ * state.
+ */
+ synchronize_rcu();
+
+ /*
+ * For each cpu runqueue, if the task's mm match @mm, ensure that all
+ * @mm's membarrier state set bits are also set in in the runqueue's
+ * membarrier state. This ensures that a runqueue scheduling
+ * between threads which are users of @mm has its membarrier state
+ * updated.
+ */
+ cpus_read_lock();
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+ struct rq *rq = cpu_rq(cpu);
+ struct task_struct *p;
+
+ p = rcu_dereference(rq->curr);
+ if (p && p->mm == mm)
+ __cpumask_set_cpu(cpu, tmpmask);
+ }
+ rcu_read_unlock();
+
+ preempt_disable();
+ smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
+ preempt_enable();
+
+ free_cpumask_var(tmpmask);
+ cpus_read_unlock();
+
+ return 0;
+}
+
static int membarrier_register_global_expedited(void)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
+ int ret;
if (atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
return 0;
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
- if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
- /*
- * For single mm user, single threaded process, we can
- * simply issue a memory barrier after setting
- * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
- * no memory access following registration is reordered
- * before registration.
- */
- smp_mb();
- } else {
- /*
- * For multi-mm user threads, we need to ensure all
- * future scheduler executions will observe the new
- * thread flag state for this mm.
- */
- synchronize_rcu();
- }
+ ret = sync_runqueues_membarrier_state(mm);
+ if (ret)
+ return ret;
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
&mm->membarrier_state);
@@ -213,12 +280,15 @@ static int membarrier_register_private_expedited(int flags)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
- int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
+ int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+ set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
+ ret;
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
return -EINVAL;
- state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+ ready_state =
+ MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
}
/*
@@ -226,20 +296,15 @@ static int membarrier_register_private_expedited(int flags)
* groups, which use the same mm. (CLONE_VM but not
* CLONE_THREAD).
*/
- if (atomic_read(&mm->membarrier_state) & state)
+ if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
return 0;
- atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
- atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
- &mm->membarrier_state);
- if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
- /*
- * Ensure all future scheduler executions will observe the
- * new thread flag state for this process.
- */
- synchronize_rcu();
- }
- atomic_or(state, &mm->membarrier_state);
+ set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
+ atomic_or(set_state, &mm->membarrier_state);
+ ret = sync_runqueues_membarrier_state(mm);
+ if (ret)
+ return ret;
+ atomic_or(ready_state, &mm->membarrier_state);
return 0;
}
@@ -253,8 +318,10 @@ static int membarrier_register_private_expedited(int flags)
* command specified does not exist, not available on the running
* kernel, or if the command argument is invalid, this system call
* returns -EINVAL. For a given command, with flags argument set to 0,
- * this system call is guaranteed to always return the same value until
- * reboot.
+ * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
+ * always return the same value until reboot. In addition, it can return
+ * -ENOMEM if there is not enough memory available to perform the system
+ * call.
*
* All memory accesses performed in program order from each targeted thread
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b3cb895d14a2..0db2c1b3361e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -911,6 +911,10 @@ struct rq {
atomic_t nr_iowait;
+#ifdef CONFIG_MEMBARRIER
+ int membarrier_state;
+#endif
+
#ifdef CONFIG_SMP
struct root_domain *rd;
struct sched_domain __rcu *sd;
@@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
static inline bool sched_energy_enabled(void) { return false; }
#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
+#ifdef CONFIG_MEMBARRIER
+/*
+ * The scheduler provides memory barriers required by membarrier between:
+ * - prior user-space memory accesses and store to rq->membarrier_state,
+ * - store to rq->membarrier_state and following user-space memory accesses.
+ * In the same way it provides those guarantees around store to rq->curr.
+ */
+static inline void membarrier_switch_mm(struct rq *rq,
+ struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+ int membarrier_state;
+
+ if (prev_mm == next_mm)
+ return;
+
+ membarrier_state = atomic_read(&next_mm->membarrier_state);
+ if (READ_ONCE(rq->membarrier_state) == membarrier_state)
+ return;
+
+ WRITE_ONCE(rq->membarrier_state, membarrier_state);
+}
+#else
+static inline void membarrier_switch_mm(struct rq *rq,
+ struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+}
+#endif
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 492a8bfaae98..44bd08f2443b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -505,14 +505,17 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
+static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
+struct bpf_nested_pt_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
- struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+ int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
@@ -527,12 +530,25 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
.data = meta,
},
};
+ struct perf_sample_data *sd;
+ struct pt_regs *regs;
+ u64 ret;
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
+ ret = -EBUSY;
+ goto out;
+ }
+ sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
+ regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, sd);
+out:
+ this_cpu_dec(bpf_event_output_nest_level);
+ return ret;
}
BPF_CALL_0(bpf_get_current_task)
diff --git a/lib/Kconfig b/lib/Kconfig
index 4e6b1c3e4c98..183f92a297ca 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -555,11 +555,10 @@ config SIGNATURE
Implementation is done using GnuPG MPI library
config DIMLIB
- bool "DIM library"
- default y
+ bool
help
Dynamic Interrupt Moderation library.
- Implements an algorithm for dynamically change CQ modertion values
+ Implements an algorithm for dynamically changing CQ moderation values
according to run time performance.
#
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 73fc517c08d2..c5cb6dcd6c69 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -659,40 +659,30 @@ release:
* available
* never: never stall for any thp allocation
*/
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
{
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
- gfp_t this_node = 0;
-
-#ifdef CONFIG_NUMA
- struct mempolicy *pol;
- /*
- * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
- * specified, to express a general desire to stay on the current
- * node for optimistic allocation attempts. If the defrag mode
- * and/or madvise hint requires the direct reclaim then we prefer
- * to fallback to other node rather than node reclaim because that
- * can lead to excessive reclaim even though there is free memory
- * on other nodes. We expect that NUMA preferences are specified
- * by memory policies.
- */
- pol = get_vma_policy(vma, addr);
- if (pol->mode != MPOL_BIND)
- this_node = __GFP_THISNODE;
- mpol_cond_put(pol);
-#endif
+ /* Always do synchronous compaction */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+
+ /* Kick kcompactd and fail quickly */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+ return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+
+ /* Synchronous compaction if madvised, otherwise kick kcompactd */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- __GFP_KSWAPD_RECLAIM | this_node);
+ return GFP_TRANSHUGE_LIGHT |
+ (vma_madvised ? __GFP_DIRECT_RECLAIM :
+ __GFP_KSWAPD_RECLAIM);
+
+ /* Only do synchronous compaction if madvised */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- this_node);
- return GFP_TRANSHUGE_LIGHT | this_node;
+ return GFP_TRANSHUGE_LIGHT |
+ (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+
+ return GFP_TRANSHUGE_LIGHT;
}
/* Caller must hold page table lock. */
@@ -764,8 +754,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
pte_free(vma->vm_mm, pgtable);
return ret;
}
- gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
- page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+ gfp = alloc_hugepage_direct_gfpmask(vma);
+ page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1372,9 +1362,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
alloc:
if (__transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) {
- huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
- new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
- haddr, numa_node_id());
+ huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+ new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
} else
new_page = NULL;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index de27d08b1ff8..4ae967bcf954 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1179,8 +1179,8 @@ static struct page *new_page(struct page *page, unsigned long start)
} else if (PageTransHuge(page)) {
struct page *thp;
- thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
- address, numa_node_id());
+ thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+ HPAGE_PMD_ORDER);
if (!thp)
return NULL;
prep_transhuge_page(thp);
@@ -1732,7 +1732,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
* freeing by another task. It is the caller's responsibility to free the
* extra reference for shared policies.
*/
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
unsigned long addr)
{
struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2081,6 +2081,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy).
+ * @hugepage: for hugepages try only the preferred node if possible
*
* This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process.
@@ -2091,7 +2092,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/
struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, int node)
+ unsigned long addr, int node, bool hugepage)
{
struct mempolicy *pol;
struct page *page;
@@ -2109,6 +2110,42 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
goto out;
}
+ if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+ int hpage_node = node;
+
+ /*
+ * For hugepage allocation and non-interleave policy which
+ * allows the current node (or other explicitly preferred
+ * node) we only try to allocate from the current/preferred
+ * node and don't fall back to other nodes, as the cost of
+ * remote accesses would likely offset THP benefits.
+ *
+ * If the policy is interleave, or does not allow the current
+ * node in its nodemask, we allocate the standard way.
+ */
+ if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+ hpage_node = pol->v.preferred_node;
+
+ nmask = policy_nodemask(gfp, pol);
+ if (!nmask || node_isset(hpage_node, *nmask)) {
+ mpol_cond_put(pol);
+ page = __alloc_pages_node(hpage_node,
+ gfp | __GFP_THISNODE, order);
+
+ /*
+ * If hugepage allocations are configured to always
+ * synchronous compact or the vma has been madvised
+ * to prefer hugepage backing, retry allowing remote
+ * memory as well.
+ */
+ if (!page && (gfp & __GFP_DIRECT_RECLAIM))
+ page = __alloc_pages_node(hpage_node,
+ gfp | __GFP_NORETRY, order);
+
+ goto out;
+ }
+ }
+
nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node);
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3334a769eb91..15c2050c629b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4467,6 +4467,28 @@ retry_cpuset:
if (page)
goto got_pg;
+ if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
+ /*
+ * If allocating entire pageblock(s) and compaction
+ * failed because all zones are below low watermarks
+ * or is prohibited because it recently failed at this
+ * order, fail immediately.
+ *
+ * Reclaim is
+ * - potentially very expensive because zones are far
+ * below their low watermarks or this is part of very
+ * bursty high order allocations,
+ * - not guaranteed to help because isolate_freepages()
+ * may not iterate over freed pages as part of its
+ * linear scan, and
+ * - unlikely to make entire pageblocks free on its
+ * own.
+ */
+ if (compact_result == COMPACT_SKIPPED ||
+ compact_result == COMPACT_DEFERRED)
+ goto nopage;
+ }
+
/*
* Checks for costly allocations with __GFP_NORETRY, which
* includes THP page fault allocations
diff --git a/mm/shmem.c b/mm/shmem.c
index 30ce722c23fa..cd570cc79c76 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1481,7 +1481,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
- HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+ HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
shmem_pseudo_vma_destroy(&pvma);
if (page)
prep_transhuge_page(page);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 4072e9d394d6..b41375d4d295 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1023,6 +1023,11 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
*/
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
goto out;
+
+ rc = -EPERM;
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ goto out;
+
rc = -ENOMEM;
sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
if (!sk)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ca5207767dc2..bb222b882b67 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -855,6 +855,8 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
break;
case SOCK_RAW:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
break;
default:
return -ESOCKTNOSUPPORT;
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index a3d188dfbe75..d5028af750d5 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -12,11 +12,11 @@ config BATMAN_ADV
depends on NET
select LIBCRC32C
help
- B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
- a routing protocol for multi-hop ad-hoc mesh networks. The
- networks may be wired or wireless. See
- https://www.open-mesh.org/ for more information and user space
- tools.
+ B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
+ a routing protocol for multi-hop ad-hoc mesh networks. The
+ networks may be wired or wireless. See
+ https://www.open-mesh.org/ for more information and user space
+ tools.
config BATMAN_ADV_BATMAN_V
bool "B.A.T.M.A.N. V protocol"
diff --git a/net/core/dev.c b/net/core/dev.c
index 71b18e80389f..bf3ed413abaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5666,7 +5666,7 @@ EXPORT_SYMBOL(gro_find_complete_by_type);
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
- secpath_reset(skb);
+ skb_ext_put(skb);
kmem_cache_free(skbuff_head_cache, skb);
}
@@ -5733,7 +5733,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- secpath_reset(skb);
+ skb_ext_reset(skb);
napi->skb = skb;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 1325316d9eab..193af526e908 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -172,7 +172,7 @@ void dst_release(struct dst_entry *dst)
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
- if (unlikely(newrefcnt < 0))
+ if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
@@ -187,7 +187,7 @@ void dst_release_immediate(struct dst_entry *dst)
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
- if (unlikely(newrefcnt < 0))
+ if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f12e8a050edb..01d65206f4fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5119,7 +5119,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->skb_iif = 0;
skb->ignore_df = 0;
skb_dst_drop(skb);
- secpath_reset(skb);
+ skb_ext_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b7381ff787b..25aab672fc99 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -230,7 +230,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
+ err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
+ sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -284,7 +285,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
- ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
+ ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index badc5cfe4dc6..d93d4531aa9b 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1008,6 +1008,9 @@ static int ieee802154_create(struct net *net, struct socket *sock,
switch (sock->type) {
case SOCK_RAW:
+ rc = -EPERM;
+ if (!capable(CAP_NET_RAW))
+ goto out;
proto = &ieee802154_raw_prot;
ops = &ieee802154_raw_ops;
break;
diff --git a/net/ife/Kconfig b/net/ife/Kconfig
index 6cd1f6d18f30..bcf650564db4 100644
--- a/net/ife/Kconfig
+++ b/net/ife/Kconfig
@@ -5,7 +5,7 @@
menuconfig NET_IFE
depends on NET
- tristate "Inter-FE based on IETF ForCES InterFE LFB"
+ tristate "Inter-FE based on IETF ForCES InterFE LFB"
default n
help
Say Y here to add support of IFE encapsulation protocol
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 974de4d20f25..03381f3e12ba 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -492,8 +492,8 @@ config TCP_CONG_WESTWOOD
wired networks and throughput over wireless links.
config TCP_CONG_HTCP
- tristate "H-TCP"
- default m
+ tristate "H-TCP"
+ default m
---help---
H-TCP is a send-side only modifications of the TCP Reno
protocol stack that optimizes the performance of TCP
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f5c163d4771b..a9183543ca30 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -560,7 +560,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
rcu_read_unlock();
return &rt->dst;
@@ -598,7 +598,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
return &rt->dst;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 06f6f280b9ff..00ec819f949b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && rt->rt_gw_family)
+ if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
IPCB(skb)->flags |= IPSKB_FORWARDED;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5eb73775c3f7..28fca408812c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -499,7 +499,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
@@ -1694,7 +1694,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
inet_sk(sk)->tos = arg->tos;
- sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
sk->sk_sndbuf = sysctl_wmem_default;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b6a6f18c3dd1..7dcce724c78b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -635,6 +635,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_uses_gateway = 1;
rt->rt_gw_family = AF_INET;
rt->rt_gw4 = fnhe->fnhe_gw;
}
@@ -1313,7 +1314,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_gw_family && mtu > 576)
+ if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
@@ -1569,6 +1570,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+ rt->rt_uses_gateway = 1;
rt->rt_gw_family = nhc->nhc_gw_family;
/* only INET and INET6 are supported */
if (likely(nhc->nhc_gw_family == AF_INET))
@@ -1634,6 +1636,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_iif = 0;
rt->rt_pmtu = 0;
rt->rt_mtu_locked = 0;
+ rt->rt_uses_gateway = 0;
rt->rt_gw_family = 0;
rt->rt_gw4 = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2694,6 +2697,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
+ rt->rt_uses_gateway = ort->rt_uses_gateway;
rt->rt_gw_family = ort->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
rt->rt_gw4 = ort->rt_gw4;
@@ -2778,21 +2782,23 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_gw_family == AF_INET &&
- nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
- goto nla_put_failure;
- } else if (rt->rt_gw_family == AF_INET6) {
- int alen = sizeof(struct in6_addr);
- struct nlattr *nla;
- struct rtvia *via;
-
- nla = nla_reserve(skb, RTA_VIA, alen + 2);
- if (!nla)
+ if (rt->rt_uses_gateway) {
+ if (rt->rt_gw_family == AF_INET &&
+ nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
goto nla_put_failure;
-
- via = nla_data(nla);
- via->rtvia_family = AF_INET6;
- memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ int alen = sizeof(struct in6_addr);
+ struct nlattr *nla;
+ struct rtvia *via;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
+ goto nla_put_failure;
+
+ via = nla_data(nla);
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+ }
}
expires = rt->dst.expires;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 95b59540eee1..32772d6ded4e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -388,7 +388,7 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
* which allows 2 outstanding 2-packet sequences, to try to keep pipe
* full even with ACK-every-other-packet delayed ACKs.
*/
-static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain)
+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
{
struct bbr *bbr = inet_csk_ca(sk);
@@ -399,7 +399,7 @@ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain)
cwnd = (cwnd + 1) & ~1U;
/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
- if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
cwnd += 2;
return cwnd;
@@ -411,7 +411,7 @@ static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
u32 inflight;
inflight = bbr_bdp(sk, bw, gain);
- inflight = bbr_quantization_budget(sk, inflight, gain);
+ inflight = bbr_quantization_budget(sk, inflight);
return inflight;
}
@@ -531,7 +531,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
* due to aggregation (of data and/or ACKs) visible in the ACK stream.
*/
target_cwnd += bbr_ack_aggregation_cwnd(sk);
- target_cwnd = bbr_quantization_budget(sk, target_cwnd, gain);
+ target_cwnd = bbr_quantization_budget(sk, target_cwnd);
/* If we're below target cwnd, slow start cwnd toward target cwnd. */
if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd394ad179a0..2ee45e3755e9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -771,6 +771,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
+ ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
}
ip_send_unicast_reply(ctl_sk,
@@ -866,6 +868,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
+ ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8bcaf2586b68..bb140a5db8c0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -266,6 +266,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_transparent = inet->transparent;
tw->tw_mark = sk->sk_mark;
+ tw->tw_priority = sk->sk_priority;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index dbd9d2d0ee63..40de2d2364a1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -210,7 +210,7 @@ static int tcp_write_timeout(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
- bool expired, do_reset;
+ bool expired = false, do_reset;
int retry_until;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
@@ -242,9 +242,10 @@ static int tcp_write_timeout(struct sock *sk)
if (tcp_out_of_resources(sk, do_reset))
return 1;
}
+ }
+ if (!expired)
expired = retransmits_timed_out(sk, retry_until,
icsk->icsk_user_timeout);
- }
tcp_fastopen_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cdef8f9a3b01..35b84b52b702 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -85,6 +85,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
+ xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_gw_family = rt->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
xdst->u.rt.rt_gw4 = rt->rt_gw4;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d22b6c140f23..f9e8fe3ff0c5 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -287,7 +287,8 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
return false;
suppress_route:
- ip6_rt_put(rt);
+ if (!(arg->flags & FIB_LOOKUP_NOREF))
+ ip6_rt_put(rt);
return true;
}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 4da24aa6c696..0a0945a5b30d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
- np->tclass);
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 87f47bc55c5e..6e2af411cd9c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -318,7 +318,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
if (rt->dst.error == -EAGAIN) {
ip6_rt_put_flags(rt, flags);
rt = net->ipv6.ip6_null_entry;
- if (!(flags | RT6_LOOKUP_F_DST_NOREF))
+ if (!(flags & RT6_LOOKUP_F_DST_NOREF))
dst_hold(&rt->dst);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89a4c7c2e25d..edadee4a7e76 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = priority;
skb->mark = mark;
mtu = dst_mtu(dst);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6120a7800975..69443e9a3aa5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -170,13 +170,13 @@ config IP6_NF_MATCH_RT
To compile it as a module, choose M here. If unsure, say N.
config IP6_NF_MATCH_SRH
- tristate '"srh" Segment Routing header match support'
- depends on NETFILTER_ADVANCED
- help
- srh matching allows you to match packets based on the segment
+ tristate '"srh" Segment Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ srh matching allows you to match packets based on the segment
routing header of the packet.
- To compile it as a module, choose M here. If unsure, say N.
+ To compile it as a module, choose M here. If unsure, say N.
# The targets
config IP6_NF_TARGET_HL
@@ -249,10 +249,10 @@ config IP6_NF_SECURITY
depends on SECURITY
depends on NETFILTER_ADVANCED
help
- This option adds a `security' table to iptables, for use
- with Mandatory Access Control (MAC) policy.
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
- If unsure, say N.
+ If unsure, say N.
config IP6_NF_NAT
tristate "ip6tables NAT support"
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 87f44d3250ee..e3d9f4559c99 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -512,7 +512,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
+ sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -803,7 +804,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label)
+ u8 tclass, __be32 label, u32 priority)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -907,7 +908,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+ priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -930,6 +932,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
struct sock *sk1 = NULL;
#endif
__be32 label = 0;
+ u32 priority = 0;
struct net *net;
int oif = 0;
@@ -990,16 +993,19 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb);
if (np->repflow)
label = ip6_flowlabel(ipv6h);
+ priority = sk->sk_priority;
}
- if (sk->sk_state == TCP_TIME_WAIT)
+ if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
+ priority = inet_twsk(sk)->tw_priority;
+ }
} else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h);
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
- label);
+ label, priority);
#ifdef CONFIG_TCP_MD5SIG
out:
@@ -1010,10 +1016,10 @@ out:
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
- __be32 label)
+ __be32 label, u32 priority)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label);
+ tclass, label, priority);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1025,7 +1031,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
inet_twsk_put(tw);
}
@@ -1048,7 +1054,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
- 0, 0);
+ 0, 0, sk->sk_priority);
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 8f12f5c6ab87..ea9e73428ed9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -378,8 +378,12 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
{
struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
struct bpf_prog *prog = psock->bpf_prog;
+ int res;
- return BPF_PROG_RUN(prog, skb);
+ preempt_disable();
+ res = BPF_PROG_RUN(prog, skb);
+ preempt_enable();
+ return res;
}
static int kcm_read_sock_done(struct strparser *strp, int err)
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 0b3f0673e1a2..ad3fd7f1da75 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -264,9 +264,7 @@ enum {
ncsi_dev_state_config_ev,
ncsi_dev_state_config_sma,
ncsi_dev_state_config_ebf,
-#if IS_ENABLED(CONFIG_IPV6)
- ncsi_dev_state_config_egmf,
-#endif
+ ncsi_dev_state_config_dgmf,
ncsi_dev_state_config_ecnt,
ncsi_dev_state_config_ec,
ncsi_dev_state_config_ae,
@@ -295,9 +293,6 @@ struct ncsi_dev_priv {
#define NCSI_DEV_RESET 8 /* Reset state of NC */
unsigned int gma_flag; /* OEM GMA flag */
spinlock_t lock; /* Protect the NCSI device */
-#if IS_ENABLED(CONFIG_IPV6)
- unsigned int inet6_addr_num; /* Number of IPv6 addresses */
-#endif
unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
struct list_head packages; /* List of packages */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 755aab66dcab..70fe02697544 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -14,7 +14,6 @@
#include <net/sock.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
-#include <net/if_inet6.h>
#include <net/genetlink.h>
#include "internal.h"
@@ -978,9 +977,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
case ncsi_dev_state_config_ev:
case ncsi_dev_state_config_sma:
case ncsi_dev_state_config_ebf:
-#if IS_ENABLED(CONFIG_IPV6)
- case ncsi_dev_state_config_egmf:
-#endif
+ case ncsi_dev_state_config_dgmf:
case ncsi_dev_state_config_ecnt:
case ncsi_dev_state_config_ec:
case ncsi_dev_state_config_ae:
@@ -1033,23 +1030,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
} else if (nd->state == ncsi_dev_state_config_ebf) {
nca.type = NCSI_PKT_CMD_EBF;
nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
- if (ncsi_channel_is_tx(ndp, nc))
+ /* if multicast global filtering is supported then
+ * disable it so that all multicast packet will be
+ * forwarded to management controller
+ */
+ if (nc->caps[NCSI_CAP_GENERIC].cap &
+ NCSI_CAP_GENERIC_MC)
+ nd->state = ncsi_dev_state_config_dgmf;
+ else if (ncsi_channel_is_tx(ndp, nc))
nd->state = ncsi_dev_state_config_ecnt;
else
nd->state = ncsi_dev_state_config_ec;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ndp->inet6_addr_num > 0 &&
- (nc->caps[NCSI_CAP_GENERIC].cap &
- NCSI_CAP_GENERIC_MC))
- nd->state = ncsi_dev_state_config_egmf;
- } else if (nd->state == ncsi_dev_state_config_egmf) {
- nca.type = NCSI_PKT_CMD_EGMF;
- nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+ } else if (nd->state == ncsi_dev_state_config_dgmf) {
+ nca.type = NCSI_PKT_CMD_DGMF;
if (ncsi_channel_is_tx(ndp, nc))
nd->state = ncsi_dev_state_config_ecnt;
else
nd->state = ncsi_dev_state_config_ec;
-#endif /* CONFIG_IPV6 */
} else if (nd->state == ncsi_dev_state_config_ecnt) {
if (np->preferred_channel &&
nc != np->preferred_channel)
@@ -1483,70 +1480,6 @@ out:
return -ENODEV;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static int ncsi_inet6addr_event(struct notifier_block *this,
- unsigned long event, void *data)
-{
- struct inet6_ifaddr *ifa = data;
- struct net_device *dev = ifa->idev->dev;
- struct ncsi_dev *nd = ncsi_find_dev(dev);
- struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
- struct ncsi_package *np;
- struct ncsi_channel *nc;
- struct ncsi_cmd_arg nca;
- bool action;
- int ret;
-
- if (!ndp || (ipv6_addr_type(&ifa->addr) &
- (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)))
- return NOTIFY_OK;
-
- switch (event) {
- case NETDEV_UP:
- action = (++ndp->inet6_addr_num) == 1;
- nca.type = NCSI_PKT_CMD_EGMF;
- break;
- case NETDEV_DOWN:
- action = (--ndp->inet6_addr_num == 0);
- nca.type = NCSI_PKT_CMD_DGMF;
- break;
- default:
- return NOTIFY_OK;
- }
-
- /* We might not have active channel or packages. The IPv6
- * required multicast will be enabled when active channel
- * or packages are chosen.
- */
- np = ndp->active_package;
- nc = ndp->active_channel;
- if (!action || !np || !nc)
- return NOTIFY_OK;
-
- /* We needn't enable or disable it if the function isn't supported */
- if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC))
- return NOTIFY_OK;
-
- nca.ndp = ndp;
- nca.req_flags = 0;
- nca.package = np->id;
- nca.channel = nc->id;
- nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
- ret = ncsi_xmit_cmd(&nca);
- if (ret) {
- netdev_warn(dev, "Fail to %s global multicast filter (%d)\n",
- (event == NETDEV_UP) ? "enable" : "disable", ret);
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block ncsi_inet6addr_notifier = {
- .notifier_call = ncsi_inet6addr_event,
-};
-#endif /* CONFIG_IPV6 */
-
static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -1725,11 +1658,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
}
spin_lock_irqsave(&ncsi_dev_lock, flags);
-#if IS_ENABLED(CONFIG_IPV6)
- ndp->inet6_addr_num = 0;
- if (list_empty(&ncsi_dev_list))
- register_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
spin_unlock_irqrestore(&ncsi_dev_lock, flags);
@@ -1896,10 +1824,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
spin_lock_irqsave(&ncsi_dev_lock, flags);
list_del_rcu(&ndp->node);
-#if IS_ENABLED(CONFIG_IPV6)
- if (list_empty(&ncsi_dev_list))
- unregister_inet6addr_notifier(&ncsi_inet6addr_notifier);
-#endif
spin_unlock_irqrestore(&ncsi_dev_lock, flags);
ncsi_unregister_netlink(nd->dev);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 34ec7afec116..91efae88e8c2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -697,7 +697,7 @@ config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE
help
- This option adds the flow table mixed IPv4/IPv6 support.
+ This option adds the flow table mixed IPv4/IPv6 support.
To compile it as a module, choose M here.
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index f6f1a0d5c47d..5b672e05d758 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -135,7 +135,7 @@ config IP_VS_WRR
module, choose M here. If unsure, say N.
config IP_VS_LC
- tristate "least-connection scheduling"
+ tristate "least-connection scheduling"
---help---
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
@@ -145,7 +145,7 @@ config IP_VS_LC
module, choose M here. If unsure, say N.
config IP_VS_WLC
- tristate "weighted least-connection scheduling"
+ tristate "weighted least-connection scheduling"
---help---
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
@@ -333,7 +333,7 @@ config IP_VS_NFCT
config IP_VS_PE_SIP
tristate "SIP persistence engine"
- depends on IP_VS_PROTO_UDP
+ depends on IP_VS_PROTO_UDP
depends on NF_CONNTRACK_SIP
---help---
Allow persistence based on the SIP Call-ID
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e4a68dc42694..d481f9baca2f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1715,7 +1715,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err2;
}
- nft_trans_chain_policy(trans) = -1;
+ nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
if (nft_is_base_chain(chain))
nft_trans_chain_policy(trans) = policy;
@@ -3562,8 +3562,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
NFT_SET_OBJECT))
return -EINVAL;
/* Only one of these operations is supported */
- if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
- (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
+ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
+ (NFT_SET_MAP | NFT_SET_OBJECT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+ (NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
}
@@ -5595,6 +5598,22 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable,
+ enum nft_trans_phase phase)
+{
+ switch (phase) {
+ case NFT_TRANS_PREPARE:
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+ flowtable->use--;
+ /* fall through */
+ default:
+ return;
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
static struct nft_flowtable *
nft_flowtable_lookup_byhandle(const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 21bb772cb4b7..e546f759b7a7 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -313,7 +313,7 @@ static int nft_flow_offload_chain(struct nft_chain *chain,
policy = ppolicy ? *ppolicy : basechain->policy;
/* Only default policy to accept is supported for now. */
- if (cmd == FLOW_BLOCK_BIND && policy != -1 && policy != NF_ACCEPT)
+ if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
return -EOPNOTSUPP;
if (dev->netdev_ops->ndo_setup_tc)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 22cf236eb5d5..f29bbc74c4bf 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -177,6 +177,23 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
return nf_ct_netns_get(ctx->net, ctx->family);
}
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ priv->flowtable->use++;
+}
+
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
@@ -205,6 +222,8 @@ static const struct nft_expr_ops nft_flow_offload_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
.eval = nft_flow_offload_eval,
.init = nft_flow_offload_init,
+ .activate = nft_flow_offload_activate,
+ .deactivate = nft_flow_offload_deactivate,
.destroy = nft_flow_offload_destroy,
.validate = nft_flow_offload_validate,
.dump = nft_flow_offload_dump,
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index c0560bf3c31b..660bad688e2b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -73,9 +73,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_EVAL)
- return -EOPNOTSUPP;
-
priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
err = nft_validate_register_load(priv->sreg, set->klen);
if (err < 0)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9b8742947aff..8dfea26536c9 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -1004,10 +1004,13 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- if (sock->type == SOCK_RAW)
+ if (sock->type == SOCK_RAW) {
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
sock->ops = &llcp_rawsock_ops;
- else
+ } else {
sock->ops = &llcp_sock_ops;
+ }
sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
if (sk == NULL)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index dde9d762edee..f30e406fbec5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2294,7 +2294,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 6c8b0f6d28f9..88f98f27ad88 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -150,6 +150,7 @@ static void __qrtr_node_release(struct kref *kref)
list_del(&node->item);
mutex_unlock(&qrtr_node_lock);
+ cancel_work_sync(&node->work);
skb_queue_purge(&node->rx_queue);
kfree(node);
}
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 38ea7f0f2699..c64e154bc18f 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -23,6 +23,6 @@ config RDS_TCP
This transport does not support RDMA operations.
config RDS_DEBUG
- bool "RDS debugging messages"
+ bool "RDS debugging messages"
depends on RDS
- default n
+ default n
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 20c156a73e73..5b5fb4ca8d3e 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -244,7 +244,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
if (rs->rs_transport) {
trans = rs->rs_transport;
- if (trans->laddr_check(sock_net(sock->sk),
+ if (!trans->laddr_check ||
+ trans->laddr_check(sock_net(sock->sk),
binding_addr, scope_id) != 0) {
ret = -ENOPROTOOPT;
goto out;
@@ -263,6 +264,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sock_set_flag(sk, SOCK_RCU_FREE);
ret = rds_add_bound(rs, binding_addr, &port, scope_id);
+ if (ret)
+ rs->rs_transport = NULL;
out:
release_sock(sk);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b3faafeafab9..2985509147a2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -324,7 +324,7 @@ config NET_SCH_CAKE
tristate "Common Applications Kept Enhanced (CAKE)"
help
Say Y here if you want to use the Common Applications Kept Enhanced
- (CAKE) queue management algorithm.
+ (CAKE) queue management algorithm.
To compile this driver as a module, choose M here: the module
will be called sch_cake.
@@ -730,8 +730,8 @@ config NET_CLS_ACT
config NET_ACT_POLICE
tristate "Traffic Policing"
- depends on NET_CLS_ACT
- ---help---
+ depends on NET_CLS_ACT
+ ---help---
Say Y here if you want to do traffic policing, i.e. strict
bandwidth limiting. This action replaces the existing policing
module.
@@ -740,9 +740,9 @@ config NET_ACT_POLICE
module will be called act_police.
config NET_ACT_GACT
- tristate "Generic actions"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Generic actions"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to take generic actions such as dropping and
accepting packets.
@@ -750,15 +750,15 @@ config NET_ACT_GACT
module will be called act_gact.
config GACT_PROB
- bool "Probability support"
- depends on NET_ACT_GACT
- ---help---
+ bool "Probability support"
+ depends on NET_ACT_GACT
+ ---help---
Say Y here to use the generic action randomly or deterministically.
config NET_ACT_MIRRED
- tristate "Redirecting and Mirroring"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Redirecting and Mirroring"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to allow packets to be mirrored or redirected to
other devices.
@@ -766,10 +766,10 @@ config NET_ACT_MIRRED
module will be called act_mirred.
config NET_ACT_SAMPLE
- tristate "Traffic Sampling"
- depends on NET_CLS_ACT
- select PSAMPLE
- ---help---
+ tristate "Traffic Sampling"
+ depends on NET_CLS_ACT
+ select PSAMPLE
+ ---help---
Say Y here to allow packet sampling tc action. The packet sample
action consists of statistically choosing packets and sampling
them using the psample module.
@@ -778,9 +778,9 @@ config NET_ACT_SAMPLE
module will be called act_sample.
config NET_ACT_IPT
- tristate "IPtables targets"
- depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- ---help---
+ tristate "IPtables targets"
+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+ ---help---
Say Y here to be able to invoke iptables targets after successful
classification.
@@ -788,9 +788,9 @@ config NET_ACT_IPT
module will be called act_ipt.
config NET_ACT_NAT
- tristate "Stateless NAT"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Stateless NAT"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to do stateless NAT on IPv4 packets. You should use
netfilter for NAT unless you know what you are doing.
@@ -798,18 +798,18 @@ config NET_ACT_NAT
module will be called act_nat.
config NET_ACT_PEDIT
- tristate "Packet Editing"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Packet Editing"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here if you want to mangle the content of packets.
To compile this code as a module, choose M here: the
module will be called act_pedit.
config NET_ACT_SIMP
- tristate "Simple Example (Debug)"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Simple Example (Debug)"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to add a simple action for demonstration purposes.
It is meant as an example and for debugging purposes. It will
print a configured policy string followed by the packet count
@@ -821,9 +821,9 @@ config NET_ACT_SIMP
module will be called act_simple.
config NET_ACT_SKBEDIT
- tristate "SKB Editing"
- depends on NET_CLS_ACT
- ---help---
+ tristate "SKB Editing"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to change skb priority or queue_mapping settings.
If unsure, say N.
@@ -832,10 +832,10 @@ config NET_ACT_SKBEDIT
module will be called act_skbedit.
config NET_ACT_CSUM
- tristate "Checksum Updating"
- depends on NET_CLS_ACT && INET
- select LIBCRC32C
- ---help---
+ tristate "Checksum Updating"
+ depends on NET_CLS_ACT && INET
+ select LIBCRC32C
+ ---help---
Say Y here to update some common checksum after some direct
packet alterations.
@@ -854,9 +854,9 @@ config NET_ACT_MPLS
module will be called act_mpls.
config NET_ACT_VLAN
- tristate "Vlan manipulation"
- depends on NET_CLS_ACT
- ---help---
+ tristate "Vlan manipulation"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to push or pop vlan headers.
If unsure, say N.
@@ -865,9 +865,9 @@ config NET_ACT_VLAN
module will be called act_vlan.
config NET_ACT_BPF
- tristate "BPF based action"
- depends on NET_CLS_ACT
- ---help---
+ tristate "BPF based action"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to execute BPF code on packets. The BPF code will decide
if the packet should be dropped or not.
@@ -877,10 +877,10 @@ config NET_ACT_BPF
module will be called act_bpf.
config NET_ACT_CONNMARK
- tristate "Netfilter Connection Mark Retriever"
- depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- ---help---
+ tristate "Netfilter Connection Mark Retriever"
+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+ depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+ ---help---
Say Y here to allow retrieving of conn mark
If unsure, say N.
@@ -889,10 +889,10 @@ config NET_ACT_CONNMARK
module will be called act_connmark.
config NET_ACT_CTINFO
- tristate "Netfilter Connection Mark Actions"
- depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- help
+ tristate "Netfilter Connection Mark Actions"
+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+ depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+ help
Say Y here to allow transfer of a connmark stored information.
Current actions transfer connmark stored DSCP into
ipv4/v6 diffserv and/or to transfer connmark to packet
@@ -906,21 +906,21 @@ config NET_ACT_CTINFO
module will be called act_ctinfo.
config NET_ACT_SKBMOD
- tristate "skb data modification action"
- depends on NET_CLS_ACT
- ---help---
- Say Y here to allow modification of skb data
+ tristate "skb data modification action"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to allow modification of skb data
- If unsure, say N.
+ If unsure, say N.
- To compile this code as a module, choose M here: the
- module will be called act_skbmod.
+ To compile this code as a module, choose M here: the
+ module will be called act_skbmod.
config NET_ACT_IFE
- tristate "Inter-FE action based on IETF ForCES InterFE LFB"
- depends on NET_CLS_ACT
- select NET_IFE
- ---help---
+ tristate "Inter-FE action based on IETF ForCES InterFE LFB"
+ depends on NET_CLS_ACT
+ select NET_IFE
+ ---help---
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
"Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -930,9 +930,9 @@ config NET_ACT_IFE
module will be called act_ife.
config NET_ACT_TUNNEL_KEY
- tristate "IP tunnel metadata manipulation"
- depends on NET_CLS_ACT
- ---help---
+ tristate "IP tunnel metadata manipulation"
+ depends on NET_CLS_ACT
+ ---help---
Say Y here to set/release ip tunnel metadata.
If unsure, say N.
@@ -941,9 +941,9 @@ config NET_ACT_TUNNEL_KEY
module will be called act_tunnel_key.
config NET_ACT_CT
- tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
- help
+ tristate "connection tracking tc action"
+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
+ help
Say Y here to allow sending the packets to conntrack module.
If unsure, say N.
@@ -952,21 +952,20 @@ config NET_ACT_CT
module will be called act_ct.
config NET_IFE_SKBMARK
- tristate "Support to encoding decoding skb mark on IFE action"
- depends on NET_ACT_IFE
+ tristate "Support to encoding decoding skb mark on IFE action"
+ depends on NET_ACT_IFE
config NET_IFE_SKBPRIO
- tristate "Support to encoding decoding skb prio on IFE action"
- depends on NET_ACT_IFE
+ tristate "Support to encoding decoding skb prio on IFE action"
+ depends on NET_ACT_IFE
config NET_IFE_SKBTCINDEX
- tristate "Support to encoding decoding skb tcindex on IFE action"
- depends on NET_ACT_IFE
+ tristate "Support to encoding decoding skb tcindex on IFE action"
+ depends on NET_ACT_IFE
config NET_TC_SKB_EXT
bool "TC recirculation support"
depends on NET_CLS_ACT
- default y if NET_CLS_ACT
select SKB_EXTENSIONS
help
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 339712296164..2558f00f6b3e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -831,6 +831,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
+static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
+ [TCA_ACT_KIND] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ - 1 },
+ [TCA_ACT_INDEX] = { .type = NLA_U32 },
+ [TCA_ACT_COOKIE] = { .type = NLA_BINARY,
+ .len = TC_COOKIE_MAX_SIZE },
+ [TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
+};
+
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -846,8 +855,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
int err;
if (name == NULL) {
- err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL,
- extack);
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+ tcf_action_policy, extack);
if (err < 0)
goto err_out;
err = -EINVAL;
@@ -856,18 +865,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
goto err_out;
}
- if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
- NL_SET_ERR_MSG(extack, "TC action name too long");
- goto err_out;
- }
- if (tb[TCA_ACT_COOKIE]) {
- int cklen = nla_len(tb[TCA_ACT_COOKIE]);
-
- if (cklen > TC_COOKIE_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
- goto err_out;
- }
+ nla_strlcpy(act_name, kind, IFNAMSIZ);
+ if (tb[TCA_ACT_COOKIE]) {
cookie = nla_memdup_cookie(tb);
if (!cookie) {
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
@@ -1098,7 +1098,8 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
int index;
int err;
- err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack);
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+ tcf_action_policy, extack);
if (err < 0)
goto err_out;
@@ -1152,7 +1153,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
b = skb_tail_pointer(skb);
- err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack);
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+ tcf_action_policy, extack);
if (err < 0)
goto err_out;
@@ -1440,7 +1442,7 @@ static struct nlattr *find_dump_kind(struct nlattr **nla)
if (tb[1] == NULL)
return NULL;
- if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0)
+ if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 692c4c9040fd..514456a0b9a8 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -146,6 +146,7 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev)
case ARPHRD_TUNNEL6:
case ARPHRD_SIT:
case ARPHRD_IPGRE:
+ case ARPHRD_IP6GRE:
case ARPHRD_VOID:
case ARPHRD_NONE:
return false;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 32577c248968..64584a1df425 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2894,8 +2894,10 @@ out:
void tcf_exts_destroy(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
- kfree(exts->actions);
+ if (exts->actions) {
+ tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
+ kfree(exts->actions);
+ }
exts->nr_actions = 0;
#endif
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1047825d9f48..81d58b280612 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1390,7 +1390,8 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
}
const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
- [TCA_KIND] = { .type = NLA_STRING },
+ [TCA_KIND] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ - 1 },
[TCA_RATE] = { .type = NLA_BINARY,
.len = sizeof(struct tc_estimator) },
[TCA_STAB] = { .type = NLA_NESTED },
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 93b58fde99b7..1bef152c5721 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -392,7 +392,6 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- int err;
if (!opt) {
NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
@@ -404,6 +403,10 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
if (!q->qdisc)
return -ENOMEM;
+ spin_lock(&cbs_list_lock);
+ list_add(&q->cbs_list, &cbs_list);
+ spin_unlock(&cbs_list_lock);
+
qdisc_hash_add(q->qdisc, false);
q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
@@ -413,17 +416,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
- err = cbs_change(sch, opt, extack);
- if (err)
- return err;
-
- if (!q->offload) {
- spin_lock(&cbs_list_lock);
- list_add(&q->cbs_list, &cbs_list);
- spin_unlock(&cbs_list_lock);
- }
-
- return 0;
+ return cbs_change(sch, opt, extack);
}
static void cbs_destroy(struct Qdisc *sch)
@@ -431,15 +424,18 @@ static void cbs_destroy(struct Qdisc *sch)
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- spin_lock(&cbs_list_lock);
- list_del(&q->cbs_list);
- spin_unlock(&cbs_list_lock);
+ /* Nothing to do if we couldn't create the underlying qdisc */
+ if (!q->qdisc)
+ return;
qdisc_watchdog_cancel(&q->watchdog);
cbs_disable_offload(dev, q);
- if (q->qdisc)
- qdisc_put(q->qdisc);
+ spin_lock(&cbs_list_lock);
+ list_del(&q->cbs_list);
+ spin_unlock(&cbs_list_lock);
+
+ qdisc_put(q->qdisc);
}
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7bcf20ef9145..8184c87da8be 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1302,6 +1302,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct htb_class *cl = (struct htb_class *)*arg, *parent;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_HTB_MAX + 1];
+ struct Qdisc *parent_qdisc = NULL;
struct tc_htb_opt *hopt;
u64 rate64, ceil64;
int warn = 0;
@@ -1401,7 +1402,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (parent && !parent->level) {
/* turn parent into inner node */
qdisc_purge_queue(parent->leaf.q);
- qdisc_put(parent->leaf.q);
+ parent_qdisc = parent->leaf.q;
if (parent->prio_activity)
htb_deactivate(q, parent);
@@ -1480,6 +1481,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
sch_tree_unlock(sch);
+ qdisc_put(parent_qdisc);
if (warn)
pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e1087746f6a2..b2b7fdb06fc6 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -174,7 +174,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
{
struct multiq_sched_data *q = qdisc_priv(sch);
struct tc_multiq_qopt *qopt;
- int i;
+ struct Qdisc **removed;
+ int i, n_removed = 0;
if (!netif_is_multiqueue(qdisc_dev(sch)))
return -EOPNOTSUPP;
@@ -185,6 +186,11 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+ removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+ GFP_KERNEL);
+ if (!removed)
+ return -ENOMEM;
+
sch_tree_lock(sch);
q->bands = qopt->bands;
for (i = q->bands; i < q->max_bands; i++) {
@@ -192,13 +198,17 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
- qdisc_tree_flush_backlog(child);
- qdisc_put(child);
+ qdisc_purge_queue(child);
+ removed[n_removed++] = child;
}
}
sch_tree_unlock(sch);
+ for (i = 0; i < n_removed; i++)
+ qdisc_put(removed[i]);
+ kfree(removed);
+
for (i = 0; i < q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
struct Qdisc *child, *old;
@@ -213,11 +223,10 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
if (child != &noop_qdisc)
qdisc_hash_add(child, true);
- if (old != &noop_qdisc) {
- qdisc_tree_flush_backlog(old);
- qdisc_put(old);
- }
+ if (old != &noop_qdisc)
+ qdisc_purge_queue(old);
sch_tree_unlock(sch);
+ qdisc_put(old);
}
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b17f2ed970e2..0e44039e729c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -476,7 +476,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* skb will be queued.
*/
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
- struct Qdisc *rootq = qdisc_root(sch);
+ struct Qdisc *rootq = qdisc_root_bh(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
@@ -777,7 +777,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
struct disttable *d;
int i;
- if (n > NETEM_DIST_MAX)
+ if (!n || n > NETEM_DIST_MAX)
return -EINVAL;
d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1dff8506a715..d448fe3068e5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -488,7 +488,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
- struct Qdisc *child;
+ struct Qdisc *child, *old;
struct nlattr *tb[TCA_SFB_MAX + 1];
const struct tc_sfb_qopt *ctl = &sfb_default_ops;
u32 limit;
@@ -518,8 +518,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
sch_tree_lock(sch);
- qdisc_tree_flush_backlog(q->qdisc);
- qdisc_put(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
+ old = q->qdisc;
q->qdisc = child;
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
@@ -542,6 +542,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
sfb_init_perturbation(1, q);
sch_tree_unlock(sch);
+ qdisc_put(old);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e5f2fc726a98..dd860fea0148 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -215,7 +215,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
rcu_read_lock();
res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
- tclass);
+ tclass, sk->sk_priority);
rcu_read_unlock();
return res;
}
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index bba3104f128f..16d5f353163a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -376,8 +376,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- headroom = ALIGN(headroom, 64);
-
size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
if (size_chk < 0)
return -EINVAL;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 715967762312..ede55fec3618 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -48,6 +48,8 @@ struct btf_dump_type_aux_state {
__u8 fwd_emitted: 1;
/* whether unique non-duplicate name was already assigned */
__u8 name_resolved: 1;
+ /* whether type is referenced from any other type */
+ __u8 referenced: 1;
};
struct btf_dump {
@@ -173,6 +175,7 @@ void btf_dump__free(struct btf_dump *d)
free(d);
}
+static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
@@ -213,6 +216,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
/* VOID is special */
d->type_states[0].order_state = ORDERED;
d->type_states[0].emit_state = EMITTED;
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ return err;
}
d->emit_queue_cnt = 0;
@@ -226,6 +234,79 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
return 0;
}
+/*
+ * Mark all types that are referenced from any other type. This is used to
+ * determine top-level anonymous enums that need to be emitted as an
+ * independent type declarations.
+ * Anonymous enums come in two flavors: either embedded in a struct's field
+ * definition, in which case they have to be declared inline as part of field
+ * type declaration; or as a top-level anonymous enum, typically used for
+ * declaring global constants. It's impossible to distinguish between two
+ * without knowning whether given enum type was referenced from other type:
+ * top-level anonymous enum won't be referenced by anything, while embedded
+ * one will.
+ */
+static int btf_dump_mark_referenced(struct btf_dump *d)
+{
+ int i, j, n = btf__get_nr_types(d->btf);
+ const struct btf_type *t;
+ __u16 vlen;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(d->btf, i);
+ vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ break;
+
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ d->type_states[t->type].referenced = 1;
+ break;
+
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+
+ d->type_states[a->index_type].referenced = 1;
+ d->type_states[a->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ for (j = 0; j < vlen; j++, m++)
+ d->type_states[m->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+
+ for (j = 0; j < vlen; j++, p++)
+ d->type_states[p->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+ for (j = 0; j < vlen; j++, v++)
+ d->type_states[v->type].referenced = 1;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
{
__u32 *new_queue;
@@ -395,7 +476,12 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
}
case BTF_KIND_ENUM:
case BTF_KIND_FWD:
- if (t->name_off != 0) {
+ /*
+ * non-anonymous or non-referenced enums are top-level
+ * declarations and should be emitted. Same logic can be
+ * applied to FWDs, it won't hurt anyways.
+ */
+ if (t->name_off != 0 || !tstate->referenced) {
err = btf_dump_add_emit_queue_id(d, id);
if (err)
return err;
@@ -536,11 +622,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
t = btf__type_by_id(d->btf, id);
kind = btf_kind(t);
- if (top_level_def && t->name_off == 0) {
- pr_warning("unexpected nameless definition, id:[%u]\n", id);
- return;
- }
-
if (tstate->emit_state == EMITTING) {
if (tstate->fwd_emitted)
return;
@@ -1167,6 +1248,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
return;
}
+ next_id = decls->ids[decls->cnt - 1];
next_t = btf__type_by_id(d->btf, next_id);
multidim = btf_is_array(next_t);
/* we need space if we have named non-pointer */
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 842c4fd55859..24fa313524fb 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -65,7 +65,6 @@ struct xsk_socket {
int xsks_map_fd;
__u32 queue_id;
char ifname[IFNAMSIZ];
- bool zc;
};
struct xsk_nl_info {
@@ -491,7 +490,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
- struct xdp_options opts;
struct xsk_socket *xsk;
socklen_t optlen;
int err;
@@ -611,15 +609,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->prog_fd = -1;
- optlen = sizeof(opts);
- err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
- if (err) {
- err = -errno;
- goto out_mmap_tx;
- }
-
- xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
-
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
if (err)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index fdc0b3614a9e..a82da555b1b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -203,14 +203,24 @@ static int start_server(void)
return fd;
}
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+
static void *server_thread(void *arg)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int fd = *(int *)arg;
int client_fd;
+ int err;
+
+ err = listen(fd, 1);
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_signal(&server_started);
+ pthread_mutex_unlock(&server_started_mtx);
- if (CHECK_FAIL(listen(fd, 1)) < 0) {
+ if (CHECK_FAIL(err < 0)) {
perror("Failed to listed on socket");
return NULL;
}
@@ -248,7 +258,14 @@ void test_tcp_rtt(void)
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+ if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
+ (void *)&server_fd)))
+ goto close_cgroup_fd;
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_wait(&server_started, &server_started_mtx);
+ pthread_mutex_unlock(&server_started_mtx);
+
CHECK_FAIL(run_test(cgroup_fd, server_fd));
close(server_fd);
close_cgroup_fd:
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 8a399bdfd920..067eb625d01c 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -413,7 +413,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
#else
#pragma unroll
#endif
- for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+ for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
+ if (i >= map.cnt)
+ break;
+
descr->key_lens[i] = 0;
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].key);
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 4f8ec1f10a80..a320e3844b17 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1385,7 +1385,6 @@ static int fixup_sysctl_value(const char *buf, size_t buf_len,
uint8_t raw[sizeof(uint64_t)];
uint64_t num;
} value = {};
- uint8_t c, i;
if (buf_len > sizeof(value)) {
log_err("Value is too big (%zd) to use in fixup", buf_len);
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index 5dcdfa20fc6c..126caf28b529 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -224,13 +224,6 @@ ingress_vlan_filter_test()
local vid=10
bridge vlan add vid $vid dev $swp2 master
- # During initialization the firmware enables all the VLAN filters and
- # the driver does not turn them off since the traffic will be discarded
- # by the STP filter whose default is DISCARD state. Add the VID on the
- # ingress bridge port and then remove it to make sure it is not member
- # in the VLAN.
- bridge vlan add vid $vid dev $swp1 master
- bridge vlan del vid $vid dev $swp1 master
RET=0
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
index 020c44f49a9e..f2f7ec0a99b4 100644
--- a/tools/testing/selftests/membarrier/.gitignore
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -1 +1,2 @@
-membarrier_test
+membarrier_test_multi_thread
+membarrier_test_single_thread
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index 97e3bdf3d1e9..34d1c81a2324 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -g -I../../../../usr/include/
+LDLIBS += -lpthread
-TEST_GEN_PROGS := membarrier_test
+TEST_GEN_PROGS := membarrier_test_single_thread \
+ membarrier_test_multi_thread
include ../lib.mk
-
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index 70b4ddbf126b..186be69f0a59 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -1,10 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#define _GNU_SOURCE
#include <linux/membarrier.h>
#include <syscall.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <pthread.h>
#include "../kselftest.h"
@@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void)
return 0;
}
-static int test_membarrier(void)
+static int test_membarrier_fail(void)
{
int status;
@@ -233,10 +234,27 @@ static int test_membarrier(void)
status = test_membarrier_flags_fail();
if (status)
return status;
- status = test_membarrier_global_success();
+ status = test_membarrier_private_expedited_fail();
if (status)
return status;
- status = test_membarrier_private_expedited_fail();
+ status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+ if (status < 0) {
+ ksft_test_result_fail("sys_membarrier() failed\n");
+ return status;
+ }
+ if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+ status = test_membarrier_private_expedited_sync_core_fail();
+ if (status)
+ return status;
+ }
+ return 0;
+}
+
+static int test_membarrier_success(void)
+{
+ int status;
+
+ status = test_membarrier_global_success();
if (status)
return status;
status = test_membarrier_register_private_expedited_success();
@@ -251,9 +269,6 @@ static int test_membarrier(void)
return status;
}
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
- status = test_membarrier_private_expedited_sync_core_fail();
- if (status)
- return status;
status = test_membarrier_register_private_expedited_sync_core_success();
if (status)
return status;
@@ -300,14 +315,3 @@ static int test_membarrier_query(void)
ksft_test_result_pass("sys_membarrier available\n");
return 0;
}
-
-int main(int argc, char **argv)
-{
- ksft_print_header();
- ksft_set_plan(13);
-
- test_membarrier_query();
- test_membarrier();
-
- return ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
new file mode 100644
index 000000000000..ac5613e5b0eb
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+static int thread_ready, thread_quit;
+static pthread_mutex_t test_membarrier_thread_mutex =
+ PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t test_membarrier_thread_cond =
+ PTHREAD_COND_INITIALIZER;
+
+void *test_membarrier_thread(void *arg)
+{
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ thread_ready = 1;
+ pthread_cond_broadcast(&test_membarrier_thread_cond);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ while (!thread_quit)
+ pthread_cond_wait(&test_membarrier_thread_cond,
+ &test_membarrier_thread_mutex);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ return NULL;
+}
+
+static int test_mt_membarrier(void)
+{
+ int i;
+ pthread_t test_thread;
+
+ pthread_create(&test_thread, NULL,
+ test_membarrier_thread, NULL);
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ while (!thread_ready)
+ pthread_cond_wait(&test_membarrier_thread_cond,
+ &test_membarrier_thread_mutex);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ test_membarrier_fail();
+
+ test_membarrier_success();
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ thread_quit = 1;
+ pthread_cond_broadcast(&test_membarrier_thread_cond);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ pthread_join(test_thread, NULL);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(13);
+
+ test_membarrier_query();
+
+ /* Multi-threaded */
+ test_mt_membarrier();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
new file mode 100644
index 000000000000..c1c963902854
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(13);
+
+ test_membarrier_query();
+
+ test_membarrier_fail();
+
+ test_membarrier_success();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index e6828732843e..9dc35a16e415 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -15,6 +15,8 @@
PAUSE_ON_FAIL=no
VERBOSE=0
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
################################################################################
# helpers
@@ -200,7 +202,7 @@ validate_v6_exception()
local rc
if [ ${ping_sz} != "0" ]; then
- run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+ run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
fi
if [ "$VERBOSE" = "1" ]; then
@@ -243,7 +245,7 @@ do
run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
- run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+ run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
[ $ret -ne 0 ] && break
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index f9ebeac1e6f2..796670ebc65b 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -940,6 +940,20 @@ basic()
run_cmd "$IP nexthop add id 104 group 1 dev veth1"
log_test $? 2 "Nexthop group and device"
+ # Tests to ensure that flushing works as expected.
+ run_cmd "$IP nexthop add id 105 blackhole proto 99"
+ run_cmd "$IP nexthop add id 106 blackhole proto 100"
+ run_cmd "$IP nexthop add id 107 blackhole proto 99"
+ run_cmd "$IP nexthop flush proto 99"
+ check_nexthop "id 105" ""
+ check_nexthop "id 106" "id 106 blackhole proto 100"
+ check_nexthop "id 107" ""
+ run_cmd "$IP nexthop flush proto 100"
+ check_nexthop "id 106" ""
+
+ run_cmd "$IP nexthop flush proto 100"
+ log_test $? 0 "Test proto flush"
+
run_cmd "$IP nexthop add id 104 group 1 blackhole"
log_test $? 2 "Nexthop group and blackhole"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 4465fc2dae14..c4ba0ff4a53f 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -17,6 +17,8 @@ PAUSE=no
IP="ip -netns ns1"
NS_EXEC="ip netns exec ns1"
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
log_test()
{
local rc=$1
@@ -614,6 +616,20 @@ fib_nexthop_test()
cleanup
}
+fib_suppress_test()
+{
+ $IP link add dummy1 type dummy
+ $IP link set dummy1 up
+ $IP -6 route add default dev dummy1
+ $IP -6 rule add table main suppress_prefixlength 0
+ ping -f -c 1000 -W 1 1234::1 || true
+ $IP -6 rule del table main suppress_prefixlength 0
+ $IP link del dummy1
+
+ # If we got here without crashing, we're good.
+ return 0
+}
+
################################################################################
# Tests on route add and replace
@@ -1086,7 +1102,7 @@ ipv6_route_metrics_test()
log_test $rc 0 "Multipath route with mtu metric"
$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
- run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+ run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
log_test $? 0 "Using route with mtu metric"
run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo"
@@ -1591,6 +1607,7 @@ do
fib_carrier_test|carrier) fib_carrier_test;;
fib_rp_filter_test|rp_filter) fib_rp_filter_test;;
fib_nexthop_test|nexthop) fib_nexthop_test;;
+ fib_suppress_test|suppress) fib_suppress_test;;
ipv6_route_test|ipv6_rt) ipv6_route_test;;
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index f1fbc15800c4..ed1565809d2b 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -4,6 +4,7 @@ noarg:
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
large_vm_fork_separation
+TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
@@ -19,3 +20,4 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000000..9868a5ddd847
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+ __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+ time_t now;
+ time(&now);
+ printf("=================================\n");
+ printf(" Error: %s\n", msg);
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+ exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ * 1) FLUSH the contents of that word.
+ * 2) LOAD the contents of that word.
+ * 3) COMPARE the contents of that word with the content that was
+ * previously stored at that word
+ * 4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS 64
+#define THREAD_ID_BITS 8
+#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE 1024
+#define BITS_PER_BYTE 8
+#define WORD_SIZE (sizeof(unsigned int))
+#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+ char *chunk_start;
+
+ chunk_start = (char *)((unsigned long)map1 +
+ (thread_id * RIM_CHUNK_SIZE));
+
+ return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+ unsigned int delta_bytes, ret;
+ delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+ ret = delta_bytes/WORD_SIZE;
+
+ return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ * - The thread-id of the rim_thread performing the store
+ * (The most significant THREAD_ID_BITS)
+ *
+ * - The word-offset of the address into which the store is being
+ * performed (The next WORD_OFFSET_BITS)
+ *
+ * - The sweep_id of the current sweep in which the store is
+ * being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * | Thread id | Word offset | sweep_id |
+ * |------------------|--------------------|---------------------------------|
+ * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ * address == map1 +
+ * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT 0
+#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+ unsigned int *addr,
+ unsigned int sweep_id)
+{
+ unsigned int ret = 0;
+ char *start = compute_chunk_start_addr(tid);
+ unsigned int word_offset = compute_word_offset(start, addr);
+
+ ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+ ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+ ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+ return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+ return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+ return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+ unsigned int ret;
+
+ ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+ return ret;
+}
+
+/************************************************************
+ * *
+ * Logging the output of the verification *
+ * *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+ unsigned int *addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ FILE *f;
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[2] = "/";
+ char *chunk_start = compute_chunk_start_addr(tid);
+ unsigned int size = RIM_CHUNK_SIZE;
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+ f = fopen(path, "w");
+
+ if (!f) {
+ err_msg("Unable to create logfile\n");
+ }
+
+ fp[tid] = f;
+
+ fprintf(f, "----------------------------------------------------------\n");
+ fprintf(f, "PID = %d\n", rim_process_pid);
+ fprintf(f, "Thread id = %02d\n", tid);
+ fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
+ fprintf(f, "Chunk Size = %d\n", size);
+ fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
+ fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
+ fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+ unsigned int expected, unsigned int observed)
+{
+ FILE *f = fp[tid];
+
+ fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+ tid, (unsigned long)addr, expected, observed);
+ fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
+ fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
+ fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+ fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+ fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
+ fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+ FILE *f = fp[tid];
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[] = "/";
+
+ fclose(f);
+
+ if (nr_anamolies == 0) {
+ remove(path);
+ return;
+ }
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+
+ printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+ tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | cur_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | prev_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ unsigned int *iter_ptr;
+ unsigned int size = RIM_CHUNK_SIZE;
+ unsigned int expected;
+ unsigned int observed;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ int nr_anamolies = 0;
+
+ start_verification_log(tid, next_store_addr,
+ cur_sweep_id, prev_sweep_id);
+
+ for (iter_ptr = (unsigned int *)chunk_start;
+ (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+ iter_ptr++) {
+ unsigned int expected_sweep_id;
+
+ if (iter_ptr < next_store_addr) {
+ expected_sweep_id = cur_sweep_id;
+ } else {
+ expected_sweep_id = prev_sweep_id;
+ }
+
+ expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+ dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+ observed = *iter_ptr;
+
+ if (observed != expected) {
+ nr_anamolies++;
+ log_anamoly(tid, iter_ptr, expected, observed);
+ }
+ }
+
+ end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ /* haven't reproduced with this setting, it kills random preemption which may be a factor */
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static void set_mycpu(int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+ while (segv_wait) {
+ sched_yield();
+ }
+
+}
+
+static void set_segv_handler(void)
+{
+ struct sigaction sa;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = segv_handler;
+
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+ unsigned int tid = *((unsigned int *)arg);
+
+ int size = RIM_CHUNK_SIZE;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ unsigned int prev_sweep_id;
+ unsigned int cur_sweep_id = 0;
+
+ /* word access */
+ unsigned int pattern = cur_sweep_id;
+ unsigned int *pattern_ptr = &pattern;
+ unsigned int *w_ptr, read_data;
+
+ set_segv_handler();
+
+ /*
+ * Let us initialize the chunk:
+ *
+ * Each word-aligned address addr in the chunk,
+ * is initialized to :
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | 0 |
+ * |-------------------------------------------------|
+ */
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+ *w_ptr = *pattern_ptr;
+ }
+
+ while (!corruption_found && !timeout) {
+ prev_sweep_id = cur_sweep_id;
+ cur_sweep_id = cur_sweep_id + 1;
+
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+ unsigned int old_pattern;
+
+ /*
+ * Compute the pattern that we would have
+ * stored at this location in the previous
+ * sweep.
+ */
+ old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+ /*
+ * FLUSH:Ensure that we flush the contents of
+ * the cache before loading
+ */
+ dcbf((volatile unsigned int*)w_ptr); //Flush
+
+ /* LOAD: Read the value */
+ read_data = *w_ptr; //Load
+
+ /*
+ * COMPARE: Is it the same as what we had stored
+ * in the previous sweep ? It better be!
+ */
+ if (read_data != old_pattern) {
+ /* No it isn't! Tell everyone */
+ corruption_found = 1;
+ }
+
+ /*
+ * Before performing a store, let us check if
+ * any rim_thread has found a corruption.
+ */
+ if (corruption_found || timeout) {
+ /*
+ * Yes. Someone (including us!) has found
+ * a corruption :(
+ *
+ * Let us verify that our chunk is
+ * correct.
+ */
+ /* But first, let us allow the dust to settle down! */
+ verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+ return 0;
+ }
+
+ /*
+ * Compute the new pattern that we are going
+ * to write to this location
+ */
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+ /*
+ * STORE: Now let us write this pattern into
+ * the location
+ */
+ *w_ptr = *pattern_ptr;
+ }
+ }
+
+ return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+ int page_size = getpagesize();
+ size_t size = page_size;
+ void *tmp = malloc(size);
+
+ while (!corruption_found && !timeout) {
+ /* Stop memory migration once corruption is found */
+ segv_wait = 1;
+
+ mprotect(map1, size, PROT_READ);
+
+ /*
+ * Load from the working alias (map1). Loading from map2
+ * also fails.
+ */
+ memcpy(tmp, map1, size);
+
+ /*
+ * Stores must go via map2 which has write permissions, but
+ * the corrupted data tends to be seen in the snapshot buffer,
+ * so corruption does not appear to be introduced at the
+ * copy-back via map2 alias here.
+ */
+ memcpy(map2, tmp, size);
+ /*
+ * Before releasing other threads, must ensure the copy
+ * back to
+ */
+ asm volatile("sync" ::: "memory");
+ mprotect(map1, size, PROT_READ|PROT_WRITE);
+ asm volatile("sync" ::: "memory");
+ segv_wait = 0;
+
+ usleep(1); /* This value makes a big difference */
+ }
+
+ return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+ timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int page_size = getpagesize();
+ time_t now;
+ int i, dir_error;
+ pthread_attr_t attr;
+ key_t shm_key = (key_t) getpid();
+ int shmid, run_time = 20 * 60;
+ struct sigaction sa_alrm;
+
+ snprintf(logdir, LOGDIR_NAME_SIZE,
+ "/tmp/logdir-%u", (unsigned int)getpid());
+ while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+ switch(c) {
+ case 'r':
+ start_cpu = strtoul(optarg, NULL, 10);
+ break;
+ case 'h':
+ printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+ exit(0);
+ break;
+ case 'n':
+ nrthreads = strtoul(optarg, NULL, 10);
+ break;
+ case 'l':
+ strncpy(logdir, optarg, LOGDIR_NAME_SIZE);
+ break;
+ case 't':
+ run_time = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ printf("invalid option\n");
+ exit(0);
+ break;
+ }
+ }
+
+ if (nrthreads > MAX_THREADS)
+ nrthreads = MAX_THREADS;
+
+ shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+ if (shmid < 0) {
+ err_msg("Failed shmget\n");
+ }
+
+ map1 = shmat(shmid, NULL, 0);
+ if (map1 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ map2 = shmat(shmid, NULL, 0);
+ if (map2 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ dir_error = mkdir(logdir, 0755);
+
+ if (dir_error) {
+ err_msg("Failed mkdir");
+ }
+
+ printf("start_cpu list:%lu\n", start_cpu);
+ printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+ printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+ printf("logdir at : %s\n", logdir);
+ printf("Timeout: %d seconds\n", run_time);
+
+ time(&now);
+ printf("=================================\n");
+ printf(" Starting Test\n");
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+
+ for (i = 0; i < nrthreads; i++) {
+ if (1 && !fork()) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ set_mycpu(start_cpu + i);
+ for (;;)
+ sched_yield();
+ exit(0);
+ }
+ }
+
+
+ sa_alrm.sa_handler = &alrm_sighandler;
+ sigemptyset(&sa_alrm.sa_mask);
+ sa_alrm.sa_flags = 0;
+
+ if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+ err_msg("Failed signal handler registration\n");
+ }
+
+ alarm(run_time);
+
+ pthread_attr_init(&attr);
+ for (i = 0; i < nrthreads; i++) {
+ rim_thread_ids[i] = i;
+ pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+ set_pthread_cpu(rim_threads[i], start_cpu + i);
+ }
+
+ pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+ set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+ pthread_join(mem_snapshot_thread, NULL);
+ for (i = 0; i < nrthreads; i++) {
+ pthread_join(rim_threads[i], NULL);
+ }
+
+ if (!timeout) {
+ time(&now);
+ printf("=================================\n");
+ printf(" Data Corruption Detected\n");
+ printf(" %s", ctime(&now));
+ printf(" See logfiles in %s\n", logdir);
+ printf("=================================\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 951fe855f7cd..98f2708d86cc 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -17,3 +17,4 @@ tm-vmx-unavail
tm-unavailable
tm-trap
tm-sigreturn
+tm-poison
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0734ed0ef56..b15a1a325bd0 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
- tm-signal-context-force-tm
+ tm-signal-context-force-tm tm-poison
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
new file mode 100644
index 000000000000..977558497c16
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp.
+ *
+ * This test will spawn two processes. Both will be attached to the same
+ * CPU (CPU 0). The child will be in a loop writing to FP register f31 and
+ * VMX/VEC/Altivec register vr31 a known value, called poison, calling
+ * sched_yield syscall after to allow the parent to switch on the CPU.
+ * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and
+ * vr31 remain 1 as expected until a given timeout (2m). If the issue is
+ * present child's poison will leak into parent's f31 or vr31 registers,
+ * otherwise, poison will never leak into parent's f31 and vr31 registers.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <inttypes.h>
+
+#include "tm.h"
+
+int tm_poison_test(void)
+{
+ int pid;
+ cpu_set_t cpuset;
+ uint64_t poison = 0xdeadbeefc0dec0fe;
+ uint64_t unknown = 0;
+ bool fail_fp = false;
+ bool fail_vr = false;
+
+ SKIP_IF(!have_htm());
+
+ /* Attach both Child and Parent to CPU 0 */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ pid = fork();
+ if (!pid) {
+ /**
+ * child
+ */
+ while (1) {
+ sched_yield();
+ asm (
+ "mtvsrd 31, %[poison];" // f31 = poison
+ "mtvsrd 63, %[poison];" // vr31 = poison
+
+ : : [poison] "r" (poison) : );
+ }
+ }
+
+ /**
+ * parent
+ */
+ asm (
+ /*
+ * Set r3, r4, and f31 to known value 1 before entering
+ * in transaction. They won't be written after that.
+ */
+ " li 3, 0x1 ;"
+ " li 4, 0x1 ;"
+ " mtvsrd 31, 4 ;"
+
+ /*
+ * The Time Base (TB) is a 64-bit counter register that is
+ * independent of the CPU clock and which is incremented
+ * at a frequency of 512000000 Hz, so every 1.953125ns.
+ * So it's necessary 120s/0.000000001953125s = 61440000000
+ * increments to get a 2 minutes timeout. Below we set that
+ * value in r5 and then use r6 to track initial TB value,
+ * updating TB values in r7 at every iteration and comparing it
+ * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail
+ * out since for sure we spent already 2 minutes in the loop.
+ * SPR 268 is the TB register.
+ */
+ " lis 5, 14 ;"
+ " ori 5, 5, 19996 ;"
+ " sldi 5, 5, 16 ;" // r5 = 61440000000
+
+ " mfspr 6, 268 ;" // r6 (TB initial)
+ "1: mfspr 7, 268 ;" // r7 (TB current)
+ " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
+ " cmpd 7, 5 ;"
+ " bgt 3f ;" // yes, exit
+
+ /*
+ * Main loop to check f31
+ */
+ " tbegin. ;" // no, try again
+ " beq 1b ;" // restart if no timeout
+ " mfvsrd 3, 31 ;" // read f31
+ " cmpd 3, 4 ;" // f31 == 1 ?
+ " bne 2f ;" // broken :-(
+ " tabort. 3 ;" // try another transaction
+ "2: tend. ;" // commit transaction
+ "3: mr %[unknown], 3 ;" // record r3
+
+ : [unknown] "=r" (unknown)
+ :
+ : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31"
+
+ );
+
+ /*
+ * On leak 'unknown' will contain 'poison' value from child,
+ * otherwise (no leak) 'unknown' will contain the same value
+ * as r3 before entering in transactional mode, i.e. 0x1.
+ */
+ fail_fp = unknown != 0x1;
+ if (fail_fp)
+ printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown);
+ else
+ printf("Good, no poison or leaked value into FP registers\n");
+
+ asm (
+ /*
+ * Set r3, r4, and vr31 to known value 1 before entering
+ * in transaction. They won't be written after that.
+ */
+ " li 3, 0x1 ;"
+ " li 4, 0x1 ;"
+ " mtvsrd 63, 4 ;"
+
+ " lis 5, 14 ;"
+ " ori 5, 5, 19996 ;"
+ " sldi 5, 5, 16 ;" // r5 = 61440000000
+
+ " mfspr 6, 268 ;" // r6 (TB initial)
+ "1: mfspr 7, 268 ;" // r7 (TB current)
+ " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
+ " cmpd 7, 5 ;"
+ " bgt 3f ;" // yes, exit
+
+ /*
+ * Main loop to check vr31
+ */
+ " tbegin. ;" // no, try again
+ " beq 1b ;" // restart if no timeout
+ " mfvsrd 3, 63 ;" // read vr31
+ " cmpd 3, 4 ;" // vr31 == 1 ?
+ " bne 2f ;" // broken :-(
+ " tabort. 3 ;" // try another transaction
+ "2: tend. ;" // commit transaction
+ "3: mr %[unknown], 3 ;" // record r3
+
+ : [unknown] "=r" (unknown)
+ :
+ : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63"
+
+ );
+
+ /*
+ * On leak 'unknown' will contain 'poison' value from child,
+ * otherwise (no leak) 'unknown' will contain the same value
+ * as r3 before entering in transactional mode, i.e. 0x1.
+ */
+ fail_vr = unknown != 0x1;
+ if (fail_vr)
+ printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown);
+ else
+ printf("Good, no poison or leaked value into VEC registers\n");
+
+ kill(pid, SIGKILL);
+
+ return (fail_fp | fail_vr);
+}
+
+int main(int argc, char *argv[])
+{
+ /* Test completes in about 4m */
+ test_harness_set_timeout(250);
+ return test_harness(tm_poison_test, "tm_poison_test");
+}
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 05c71ef42f51..c9449aaf438d 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -38,7 +38,6 @@ header-test- += linux/ivtv.h
header-test- += linux/jffs2.h
header-test- += linux/kexec.h
header-test- += linux/matroxfb.h
-header-test- += linux/netfilter_bridge/ebtables.h
header-test- += linux/netfilter_ipv4/ipt_LOG.h
header-test- += linux/netfilter_ipv6/ip6t_LOG.h
header-test- += linux/nfc.h