diff options
Diffstat (limited to 'arch/arm64')
38 files changed, 790 insertions, 656 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1494d5cfd681..d0a53cc6293a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -86,6 +86,7 @@ config ARM64 select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -278,13 +279,6 @@ config ARCH_SUPPORTS_UPROBES config ARCH_PROC_KCORE_TEXT def_bool y -config MULTI_IRQ_HANDLER - def_bool y - -source "init/Kconfig" - -source "kernel/Kconfig.freezer" - source "arch/arm64/Kconfig.platforms" menu "Bus support" @@ -770,7 +764,6 @@ config HOLES_IN_ZONE def_bool y depends on NUMA -source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC @@ -808,8 +801,6 @@ config ARCH_WANT_HUGE_PMD_SHARE config ARCH_HAS_CACHE_LINE_SIZE def_bool y -source "mm/Kconfig" - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- @@ -1291,10 +1282,6 @@ config DMI endmenu -menu "Userspace binary formats" - -source "fs/Kconfig.binfmt" - config COMPAT bool "Kernel support for 32-bit EL0" depends on ARM64_4K_PAGES || EXPERT @@ -1318,8 +1305,6 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -endmenu - menu "Power management options" source "kernel/power/Kconfig" @@ -1345,25 +1330,12 @@ source "drivers/cpufreq/Kconfig" endmenu -source "net/Kconfig" - -source "drivers/Kconfig" - source "drivers/firmware/Kconfig" source "drivers/acpi/Kconfig" -source "fs/Kconfig" - source "arch/arm64/kvm/Kconfig" -source "arch/arm64/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" if CRYPTO source "arch/arm64/crypto/Kconfig" endif - -source "lib/Kconfig" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cc6bd559af85..69c9170bdd24 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -1,6 +1,3 @@ -menu "Kernel hacking" - -source "lib/Kconfig.debug" config ARM64_PTDUMP_CORE def_bool n @@ -97,5 +94,3 @@ config ARM64_RELOC_TEST tristate "Relocation testing module" source "drivers/hwtracing/coresight/Kconfig" - -endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 99a1d859b8b8..efe61a2e4b5e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 4057197048dc..1a406a76c86a 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -482,9 +482,9 @@ status = "disabled"; }; - mdio_mux_iproc: mdio-mux@6602023c { + mdio_mux_iproc: mdio-mux@66020000 { compatible = "brcm,mdio-mux-iproc"; - reg = <0x6602023c 0x14>; + reg = <0x66020000 0x250>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index b203152ad67c..a70e8ddbd66f 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -278,9 +278,9 @@ #include "stingray-pinctrl.dtsi" - mdio_mux_iproc: mdio-mux@2023c { + mdio_mux_iproc: mdio-mux@20000 { compatible = "brcm,mdio-mux-iproc"; - reg = <0x0002023c 0x14>; + reg = <0x00020000 0x250>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi index 4dd06767f839..4664c33e0763 100644 --- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi +++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi @@ -11,13 +11,14 @@ fman0: fman@1a00000 { #size-cells = <1>; cell-index = <0>; compatible = "fsl,fman"; - ranges = <0x0 0x0 0x1a00000 0x100000>; - reg = <0x0 0x1a00000 0x0 0x100000>; + ranges = <0x0 0x0 0x1a00000 0xfe000>; + reg = <0x0 0x1a00000 0x0 0xfe000>; interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clockgen 3 0>; clock-names = "fmanclk"; fsl,qman-channel-range = <0x800 0x10>; + ptimer-handle = <&ptp_timer0>; muram@0 { compatible = "fsl,fman-muram"; @@ -73,9 +74,11 @@ fman0: fman@1a00000 { compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; }; +}; - ptp_timer0: ptp-timer@fe000 { - compatible = "fsl,fman-ptp-timer"; - reg = <0xfe000 0x1000>; - }; +ptp_timer0: ptp-timer@1afe000 { + compatible = "fsl,fman-ptp-timer"; + reg = <0x0 0x1afe000 0x0 0x1000>; + interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 3 0>; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index 8d477dcbfa58..851190a719ea 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -1000,6 +1000,24 @@ reset-gpios = <&gpio11 1 0 >; }; + /* UFS */ + ufs: ufs@ff3b0000 { + compatible = "hisilicon,hi3660-ufs", "jedec,ufs-1.1"; + /* 0: HCI standard */ + /* 1: UFS SYS CTRL */ + reg = <0x0 0xff3b0000 0x0 0x1000>, + <0x0 0xff3b1000 0x0 0x1000>; + interrupt-parent = <&gic>; + interrupts = <GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&crg_ctrl HI3660_CLK_GATE_UFSIO_REF>, + <&crg_ctrl HI3660_CLK_GATE_UFSPHY_CFG>; + clock-names = "ref_clk", "phy_clk"; + freq-table-hz = <0 0>, <0 0>; + /* offset: 0x84; bit: 12 */ + resets = <&crg_rst 0x84 12>; + reset-names = "rst"; + }; + /* SD */ dwmmc1: dwmmc1@ff37f000 { #address-cells = <1>; diff --git a/arch/arm64/boot/dts/hisilicon/hip07.dtsi b/arch/arm64/boot/dts/hisilicon/hip07.dtsi index 9c10030a07f8..c33adefc3061 100644 --- a/arch/arm64/boot/dts/hisilicon/hip07.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip07.dtsi @@ -1049,7 +1049,74 @@ num-pins = <2>; }; }; + p0_mbigen_alg_a:interrupt-controller@d0080000 { + compatible = "hisilicon,mbigen-v2"; + reg = <0x0 0xd0080000 0x0 0x10000>; + p0_mbigen_sec_a: intc_sec { + msi-parent = <&p0_its_dsa_a 0x40400>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <33>; + }; + p0_mbigen_smmu_alg_a: intc_smmu_alg { + msi-parent = <&p0_its_dsa_a 0x40b1b>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <3>; + }; + }; + p0_mbigen_alg_b:interrupt-controller@8,d0080000 { + compatible = "hisilicon,mbigen-v2"; + reg = <0x8 0xd0080000 0x0 0x10000>; + + p0_mbigen_sec_b: intc_sec { + msi-parent = <&p0_its_dsa_b 0x42400>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <33>; + }; + p0_mbigen_smmu_alg_b: intc_smmu_alg { + msi-parent = <&p0_its_dsa_b 0x42b1b>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <3>; + }; + }; + p1_mbigen_alg_a:interrupt-controller@400,d0080000 { + compatible = "hisilicon,mbigen-v2"; + reg = <0x400 0xd0080000 0x0 0x10000>; + + p1_mbigen_sec_a: intc_sec { + msi-parent = <&p1_its_dsa_a 0x44400>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <33>; + }; + p1_mbigen_smmu_alg_a: intc_smmu_alg { + msi-parent = <&p1_its_dsa_a 0x44b1b>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <3>; + }; + }; + p1_mbigen_alg_b:interrupt-controller@408,d0080000 { + compatible = "hisilicon,mbigen-v2"; + reg = <0x408 0xd0080000 0x0 0x10000>; + + p1_mbigen_sec_b: intc_sec { + msi-parent = <&p1_its_dsa_b 0x46400>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <33>; + }; + p1_mbigen_smmu_alg_b: intc_smmu_alg { + msi-parent = <&p1_its_dsa_b 0x46b1b>; + interrupt-controller; + #interrupt-cells = <2>; + num-pins = <3>; + }; + }; p0_mbigen_dsa_a: interrupt-controller@c0080000 { compatible = "hisilicon,mbigen-v2"; reg = <0x0 0xc0080000 0x0 0x10000>; @@ -1107,6 +1174,58 @@ hisilicon,broken-prefetch-cmd; status = "disabled"; }; + p0_smmu_alg_a: smmu_alg@d0040000 { + compatible = "arm,smmu-v3"; + reg = <0x0 0xd0040000 0x0 0x20000>; + interrupt-parent = <&p0_mbigen_smmu_alg_a>; + interrupts = <733 1>, + <734 1>, + <735 1>; + interrupt-names = "eventq", "gerror", "priq"; + #iommu-cells = <1>; + dma-coherent; + hisilicon,broken-prefetch-cmd; + /* smmu-cb-memtype = <0x0 0x1>;*/ + }; + p0_smmu_alg_b: smmu_alg@8,d0040000 { + compatible = "arm,smmu-v3"; + reg = <0x8 0xd0040000 0x0 0x20000>; + interrupt-parent = <&p0_mbigen_smmu_alg_b>; + interrupts = <733 1>, + <734 1>, + <735 1>; + interrupt-names = "eventq", "gerror", "priq"; + #iommu-cells = <1>; + dma-coherent; + hisilicon,broken-prefetch-cmd; + /* smmu-cb-memtype = <0x0 0x1>;*/ + }; + p1_smmu_alg_a: smmu_alg@400,d0040000 { + compatible = "arm,smmu-v3"; + reg = <0x400 0xd0040000 0x0 0x20000>; + interrupt-parent = <&p1_mbigen_smmu_alg_a>; + interrupts = <733 1>, + <734 1>, + <735 1>; + interrupt-names = "eventq", "gerror", "priq"; + #iommu-cells = <1>; + dma-coherent; + hisilicon,broken-prefetch-cmd; + /* smmu-cb-memtype = <0x0 0x1>;*/ + }; + p1_smmu_alg_b: smmu_alg@408,d0040000 { + compatible = "arm,smmu-v3"; + reg = <0x408 0xd0040000 0x0 0x20000>; + interrupt-parent = <&p1_mbigen_smmu_alg_b>; + interrupts = <733 1>, + <734 1>, + <735 1>; + interrupt-names = "eventq", "gerror", "priq"; + #iommu-cells = <1>; + dma-coherent; + hisilicon,broken-prefetch-cmd; + /* smmu-cb-memtype = <0x0 0x1>;*/ + }; soc { compatible = "simple-bus"; @@ -1603,5 +1722,170 @@ 0x0 0 0 4 &mbigen_pcie2_a 671 4>; status = "disabled"; }; + p0_sec_a: crypto@d2000000 { + compatible = "hisilicon,hip07-sec"; + reg = <0x0 0xd0000000 0x0 0x10000 + 0x0 0xd2000000 0x0 0x10000 + 0x0 0xd2010000 0x0 0x10000 + 0x0 0xd2020000 0x0 0x10000 + 0x0 0xd2030000 0x0 0x10000 + 0x0 0xd2040000 0x0 0x10000 + 0x0 0xd2050000 0x0 0x10000 + 0x0 0xd2060000 0x0 0x10000 + 0x0 0xd2070000 0x0 0x10000 + 0x0 0xd2080000 0x0 0x10000 + 0x0 0xd2090000 0x0 0x10000 + 0x0 0xd20a0000 0x0 0x10000 + 0x0 0xd20b0000 0x0 0x10000 + 0x0 0xd20c0000 0x0 0x10000 + 0x0 0xd20d0000 0x0 0x10000 + 0x0 0xd20e0000 0x0 0x10000 + 0x0 0xd20f0000 0x0 0x10000 + 0x0 0xd2100000 0x0 0x10000>; + interrupt-parent = <&p0_mbigen_sec_a>; + iommus = <&p0_smmu_alg_a 0x600>; + dma-coherent; + interrupts = <576 4>, + <577 1>, <578 4>, + <579 1>, <580 4>, + <581 1>, <582 4>, + <583 1>, <584 4>, + <585 1>, <586 4>, + <587 1>, <588 4>, + <589 1>, <590 4>, + <591 1>, <592 4>, + <593 1>, <594 4>, + <595 1>, <596 4>, + <597 1>, <598 4>, + <599 1>, <600 4>, + <601 1>, <602 4>, + <603 1>, <604 4>, + <605 1>, <606 4>, + <607 1>, <608 4>; + }; + p0_sec_b: crypto@8,d2000000 { + compatible = "hisilicon,hip07-sec"; + reg = <0x8 0xd0000000 0x0 0x10000 + 0x8 0xd2000000 0x0 0x10000 + 0x8 0xd2010000 0x0 0x10000 + 0x8 0xd2020000 0x0 0x10000 + 0x8 0xd2030000 0x0 0x10000 + 0x8 0xd2040000 0x0 0x10000 + 0x8 0xd2050000 0x0 0x10000 + 0x8 0xd2060000 0x0 0x10000 + 0x8 0xd2070000 0x0 0x10000 + 0x8 0xd2080000 0x0 0x10000 + 0x8 0xd2090000 0x0 0x10000 + 0x8 0xd20a0000 0x0 0x10000 + 0x8 0xd20b0000 0x0 0x10000 + 0x8 0xd20c0000 0x0 0x10000 + 0x8 0xd20d0000 0x0 0x10000 + 0x8 0xd20e0000 0x0 0x10000 + 0x8 0xd20f0000 0x0 0x10000 + 0x8 0xd2100000 0x0 0x10000>; + interrupt-parent = <&p0_mbigen_sec_b>; + iommus = <&p0_smmu_alg_b 0x600>; + dma-coherent; + interrupts = <576 4>, + <577 1>, <578 4>, + <579 1>, <580 4>, + <581 1>, <582 4>, + <583 1>, <584 4>, + <585 1>, <586 4>, + <587 1>, <588 4>, + <589 1>, <590 4>, + <591 1>, <592 4>, + <593 1>, <594 4>, + <595 1>, <596 4>, + <597 1>, <598 4>, + <599 1>, <600 4>, + <601 1>, <602 4>, + <603 1>, <604 4>, + <605 1>, <606 4>, + <607 1>, <608 4>; + }; + p1_sec_a: crypto@400,d2000000 { + compatible = "hisilicon,hip07-sec"; + reg = <0x400 0xd0000000 0x0 0x10000 + 0x400 0xd2000000 0x0 0x10000 + 0x400 0xd2010000 0x0 0x10000 + 0x400 0xd2020000 0x0 0x10000 + 0x400 0xd2030000 0x0 0x10000 + 0x400 0xd2040000 0x0 0x10000 + 0x400 0xd2050000 0x0 0x10000 + 0x400 0xd2060000 0x0 0x10000 + 0x400 0xd2070000 0x0 0x10000 + 0x400 0xd2080000 0x0 0x10000 + 0x400 0xd2090000 0x0 0x10000 + 0x400 0xd20a0000 0x0 0x10000 + 0x400 0xd20b0000 0x0 0x10000 + 0x400 0xd20c0000 0x0 0x10000 + 0x400 0xd20d0000 0x0 0x10000 + 0x400 0xd20e0000 0x0 0x10000 + 0x400 0xd20f0000 0x0 0x10000 + 0x400 0xd2100000 0x0 0x10000>; + interrupt-parent = <&p1_mbigen_sec_a>; + iommus = <&p1_smmu_alg_a 0x600>; + dma-coherent; + interrupts = <576 4>, + <577 1>, <578 4>, + <579 1>, <580 4>, + <581 1>, <582 4>, + <583 1>, <584 4>, + <585 1>, <586 4>, + <587 1>, <588 4>, + <589 1>, <590 4>, + <591 1>, <592 4>, + <593 1>, <594 4>, + <595 1>, <596 4>, + <597 1>, <598 4>, + <599 1>, <600 4>, + <601 1>, <602 4>, + <603 1>, <604 4>, + <605 1>, <606 4>, + <607 1>, <608 4>; + }; + p1_sec_b: crypto@408,d2000000 { + compatible = "hisilicon,hip07-sec"; + reg = <0x408 0xd0000000 0x0 0x10000 + 0x408 0xd2000000 0x0 0x10000 + 0x408 0xd2010000 0x0 0x10000 + 0x408 0xd2020000 0x0 0x10000 + 0x408 0xd2030000 0x0 0x10000 + 0x408 0xd2040000 0x0 0x10000 + 0x408 0xd2050000 0x0 0x10000 + 0x408 0xd2060000 0x0 0x10000 + 0x408 0xd2070000 0x0 0x10000 + 0x408 0xd2080000 0x0 0x10000 + 0x408 0xd2090000 0x0 0x10000 + 0x408 0xd20a0000 0x0 0x10000 + 0x408 0xd20b0000 0x0 0x10000 + 0x408 0xd20c0000 0x0 0x10000 + 0x408 0xd20d0000 0x0 0x10000 + 0x408 0xd20e0000 0x0 0x10000 + 0x408 0xd20f0000 0x0 0x10000 + 0x408 0xd2100000 0x0 0x10000>; + interrupt-parent = <&p1_mbigen_sec_b>; + iommus = <&p1_smmu_alg_b 0x600>; + dma-coherent; + interrupts = <576 4>, + <577 1>, <578 4>, + <579 1>, <580 4>, + <581 1>, <582 4>, + <583 1>, <584 4>, + <585 1>, <586 4>, + <587 1>, <588 4>, + <589 1>, <590 4>, + <591 1>, <592 4>, + <593 1>, <594 4>, + <595 1>, <596 4>, + <597 1>, <598 4>, + <599 1>, <600 4>, + <601 1>, <602 4>, + <603 1>, <604 4>, + <605 1>, <606 4>, + <607 1>, <608 4>; + }; + }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f9a186f6af8a..2c07e233012b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -193,6 +193,7 @@ CONFIG_SCSI_HISI_SAS=y CONFIG_SCSI_HISI_SAS_PCI=y CONFIG_SCSI_UFSHCD=m CONFIG_SCSI_UFSHCD_PLATFORM=m +CONFIG_SCSI_UFS_HISI=y CONFIG_SCSI_UFS_QCOM=m CONFIG_ATA=y CONFIG_SATA_AHCI=y diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 88f5aef7934c..e3a375c4cb83 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -19,33 +19,24 @@ * u32 *macp, u8 const rk[], u32 rounds); */ ENTRY(ce_aes_ccm_auth_data) - frame_push 7 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - - ldr w25, [x22] /* leftover from prev round? */ + ldr w8, [x3] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ - cbz w25, 1f - sub w25, w25, #16 + cbz w8, 1f + sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b -0: ldrb w7, [x20], #1 /* get 1 byte of input */ - subs w21, w21, #1 - add w25, w25, #1 +0: ldrb w7, [x1], #1 /* get 1 byte of input */ + subs w2, w2, #1 + add w8, w8, #1 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ - cbnz w25, 0b + cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.4s}, [x23] /* load first round key */ - prfm pldl1strm, [x20] - cmp w24, #12 /* which key size? */ - add x6, x23, #16 - sub w7, w24, #2 /* modified # of rounds */ +1: ld1 {v3.4s}, [x4] /* load first round key */ + prfm pldl1strm, [x1] + cmp w5, #12 /* which key size? */ + add x6, x4, #16 + sub w7, w5, #2 /* modified # of rounds */ bmi 2f bne 5f mov v5.16b, v3.16b @@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b - subs w21, w21, #16 /* last data? */ + subs w2, w2, #16 /* last data? */ eor v0.16b, v0.16b, v5.16b /* final round */ bmi 6f - ld1 {v1.16b}, [x20], #16 /* load next input block */ + ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ - beq 6f - - if_will_cond_yield_neon - st1 {v0.16b}, [x19] /* store mac */ - do_cond_yield_neon - ld1 {v0.16b}, [x19] /* reload mac */ - endif_yield_neon - - b 1b -6: st1 {v0.16b}, [x19] /* store mac */ + bne 1b +6: st1 {v0.16b}, [x0] /* store mac */ beq 10f - adds w21, w21, #16 + adds w2, w2, #16 beq 10f - mov w25, w21 -7: ldrb w7, [x20], #1 + mov w8, w2 +7: ldrb w7, [x1], #1 umov w6, v0.b[0] eor w6, w6, w7 - strb w6, [x19], #1 - subs w21, w21, #1 + strb w6, [x0], #1 + subs w2, w2, #1 beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: mov w7, w25 - add w25, w25, #16 +8: mov w7, w8 + add w8, w8, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.16b}, [x19] -10: str w25, [x22] - - frame_pop + st1 {v0.16b}, [x0] +10: str w8, [x3] ret ENDPROC(ce_aes_ccm_auth_data) @@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc - frame_push 8 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - mov x25, x6 - - ldr x26, [x25, #8] /* load lower ctr */ - ld1 {v0.16b}, [x24] /* load mac */ -CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ + ldr x8, [x6, #8] /* load lower ctr */ + ld1 {v0.16b}, [x5] /* load mac */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.8b}, [x25] /* load upper ctr */ - prfm pldl1strm, [x20] - add x26, x26, #1 - rev x9, x26 - cmp w23, #12 /* which key size? */ - sub w7, w23, #2 /* get modified # of rounds */ + ld1 {v1.8b}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + cmp w4, #12 /* which key size? */ + sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.4s}, [x22] /* load first round key */ - add x10, x22, #16 + ld1 {v3.4s}, [x3] /* load first round key */ + add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b @@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b - subs w21, w21, #16 - bmi 7f /* partial block? */ - ld1 {v2.16b}, [x20], #16 /* load next input block */ + subs w2, w2, #16 + bmi 6f /* partial block? */ + ld1 {v2.16b}, [x1], #16 /* load next input block */ .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ @@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ eor v1.16b, v2.16b, v5.16b /* final round enc */ .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ - st1 {v1.16b}, [x19], #16 /* write output block */ - beq 5f - - if_will_cond_yield_neon - st1 {v0.16b}, [x24] /* store mac */ - do_cond_yield_neon - ld1 {v0.16b}, [x24] /* reload mac */ - endif_yield_neon - - b 0b -5: -CPU_LE( rev x26, x26 ) - st1 {v0.16b}, [x24] /* store mac */ - str x26, [x25, #8] /* store lsb end of ctr (BE) */ - -6: frame_pop - ret - -7: eor v0.16b, v0.16b, v5.16b /* final round mac */ + st1 {v1.16b}, [x0], #16 /* write output block */ + bne 0b +CPU_LE( rev x8, x8 ) + st1 {v0.16b}, [x5] /* store mac */ + str x8, [x6, #8] /* store lsb end of ctr (BE) */ +5: ret + +6: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.16b}, [x24] /* store mac */ - add w21, w21, #16 /* process partial tail block */ -8: ldrb w9, [x20], #1 /* get 1 byte of input */ + st1 {v0.16b}, [x5] /* store mac */ + add w2, w2, #16 /* process partial tail block */ +7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w7, v0.b[0] /* get top mac byte */ .if \enc == 1 @@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 ) eor w9, w9, w6 eor w7, w7, w9 .endif - strb w9, [x19], #1 /* store out byte */ - strb w7, [x24], #1 /* store mac byte */ - subs w21, w21, #1 - beq 6b + strb w9, [x0], #1 /* store out byte */ + strb w7, [x5], #1 /* store mac byte */ + subs w2, w2, #1 + beq 5b ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ - b 8b + b 7b .endm /* diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index e3e50950a863..adcb83eb683c 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -567,7 +567,6 @@ static struct shash_alg mac_algs[] = { { .base.cra_name = "cmac(aes)", .base.cra_driver_name = "cmac-aes-" MODE, .base.cra_priority = PRIO, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + 2 * AES_BLOCK_SIZE, @@ -583,7 +582,6 @@ static struct shash_alg mac_algs[] = { { .base.cra_name = "xcbc(aes)", .base.cra_driver_name = "xcbc-aes-" MODE, .base.cra_priority = PRIO, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + 2 * AES_BLOCK_SIZE, @@ -599,7 +597,6 @@ static struct shash_alg mac_algs[] = { { .base.cra_name = "cbcmac(aes)", .base.cra_driver_name = "cbcmac-aes-" MODE, .base.cra_priority = PRIO, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx), .base.cra_module = THIS_MODULE, diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index dcffb9e77589..1b319b716d5e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -46,6 +46,19 @@ ss3 .req v26 ss4 .req v27 + XL2 .req v8 + XM2 .req v9 + XH2 .req v10 + XL3 .req v11 + XM3 .req v12 + XH3 .req v13 + TT3 .req v14 + TT4 .req v15 + HH .req v16 + HH3 .req v17 + HH4 .req v18 + HH34 .req v19 + .text .arch armv8-a+crypto @@ -134,11 +147,25 @@ .endm .macro __pmull_pre_p64 + add x8, x3, #16 + ld1 {HH.2d-HH4.2d}, [x8] + + trn1 SHASH2.2d, SHASH.2d, HH.2d + trn2 T1.2d, SHASH.2d, HH.2d + eor SHASH2.16b, SHASH2.16b, T1.16b + + trn1 HH34.2d, HH3.2d, HH4.2d + trn2 T1.2d, HH3.2d, HH4.2d + eor HH34.16b, HH34.16b, T1.16b + movi MASK.16b, #0xe1 shl MASK.2d, MASK.2d, #57 .endm .macro __pmull_pre_p8 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 + eor SHASH2.16b, SHASH2.16b, SHASH.16b + // k00_16 := 0x0000000000000000_000000000000ffff // k32_48 := 0x00000000ffffffff_0000ffffffffffff movi k32_48.2d, #0xffffffff @@ -213,31 +240,88 @@ .endm .macro __pmull_ghash, pn - frame_push 5 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - -0: ld1 {SHASH.2d}, [x22] - ld1 {XL.2d}, [x20] - ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 - eor SHASH2.16b, SHASH2.16b, SHASH.16b + ld1 {SHASH.2d}, [x3] + ld1 {XL.2d}, [x1] __pmull_pre_\pn /* do the head block first, if supplied */ - cbz x23, 1f - ld1 {T1.2d}, [x23] - mov x23, xzr - b 2f + cbz x4, 0f + ld1 {T1.2d}, [x4] + mov x4, xzr + b 3f + +0: .ifc \pn, p64 + tbnz w0, #0, 2f // skip until #blocks is a + tbnz w0, #1, 2f // round multiple of 4 + +1: ld1 {XM3.16b-TT4.16b}, [x2], #64 + + sub w0, w0, #4 + + rev64 T1.16b, XM3.16b + rev64 T2.16b, XH3.16b + rev64 TT4.16b, TT4.16b + rev64 TT3.16b, TT3.16b + + ext IN1.16b, TT4.16b, TT4.16b, #8 + ext XL3.16b, TT3.16b, TT3.16b, #8 + + eor TT4.16b, TT4.16b, IN1.16b + pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 + pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 + pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) + + eor TT3.16b, TT3.16b, XL3.16b + pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1 + pmull XL3.1q, HH.1d, XL3.1d // a0 * b0 + pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) + + ext IN1.16b, T2.16b, T2.16b, #8 + eor XL2.16b, XL2.16b, XL3.16b + eor XH2.16b, XH2.16b, XH3.16b + eor XM2.16b, XM2.16b, XM3.16b + + eor T2.16b, T2.16b, IN1.16b + pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1 + pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0 + pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) -1: ld1 {T1.2d}, [x21], #16 - sub w19, w19, #1 + eor XL2.16b, XL2.16b, XL3.16b + eor XH2.16b, XH2.16b, XH3.16b + eor XM2.16b, XM2.16b, XM3.16b -2: /* multiply XL by SHASH in GF(2^128) */ + ext IN1.16b, T1.16b, T1.16b, #8 + ext TT3.16b, XL.16b, XL.16b, #8 + eor XL.16b, XL.16b, IN1.16b + eor T1.16b, T1.16b, TT3.16b + + pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1 + eor T1.16b, T1.16b, XL.16b + pmull XL.1q, HH4.1d, XL.1d // a0 * b0 + pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) + + eor XL.16b, XL.16b, XL2.16b + eor XH.16b, XH.16b, XH2.16b + eor XM.16b, XM.16b, XM2.16b + + eor T2.16b, XL.16b, XH.16b + ext T1.16b, XL.16b, XH.16b, #8 + eor XM.16b, XM.16b, T2.16b + + __pmull_reduce_p64 + + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b + + cbz w0, 5f + b 1b + .endif + +2: ld1 {T1.2d}, [x2], #16 + sub w0, w0, #1 + +3: /* multiply XL by SHASH in GF(2^128) */ CPU_LE( rev64 T1.16b, T1.16b ) ext T2.16b, XL.16b, XL.16b, #8 @@ -250,7 +334,7 @@ CPU_LE( rev64 T1.16b, T1.16b ) __pmull_\pn XL, XL, SHASH // a0 * b0 __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) - eor T2.16b, XL.16b, XH.16b +4: eor T2.16b, XL.16b, XH.16b ext T1.16b, XL.16b, XH.16b, #8 eor XM.16b, XM.16b, T2.16b @@ -259,18 +343,9 @@ CPU_LE( rev64 T1.16b, T1.16b ) eor T2.16b, T2.16b, XH.16b eor XL.16b, XL.16b, T2.16b - cbz w19, 3f + cbnz w0, 0b - if_will_cond_yield_neon - st1 {XL.2d}, [x20] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b - -3: st1 {XL.2d}, [x20] - frame_pop +5: st1 {XL.2d}, [x1] ret .endm @@ -286,9 +361,10 @@ ENTRY(pmull_ghash_update_p8) __pmull_ghash p8 ENDPROC(pmull_ghash_update_p8) - KS .req v8 - CTR .req v9 - INP .req v10 + KS0 .req v12 + KS1 .req v13 + INP0 .req v14 + INP1 .req v15 .macro load_round_keys, rounds, rk cmp \rounds, #12 @@ -322,142 +398,153 @@ ENDPROC(pmull_ghash_update_p8) .endm .macro pmull_gcm_do_crypt, enc - frame_push 10 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - mov x25, x6 - mov x26, x7 - .if \enc == 1 - ldr x27, [sp, #96] // first stacked arg - .endif - - ldr x28, [x24, #8] // load lower counter -CPU_LE( rev x28, x28 ) - -0: mov x0, x25 - load_round_keys w26, x0 - ld1 {SHASH.2d}, [x23] - ld1 {XL.2d}, [x20] + ld1 {SHASH.2d}, [x4], #16 + ld1 {HH.2d}, [x4] + ld1 {XL.2d}, [x1] + ldr x8, [x5, #8] // load lower counter movi MASK.16b, #0xe1 - ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 + trn1 SHASH2.2d, SHASH.2d, HH.2d + trn2 T1.2d, SHASH.2d, HH.2d +CPU_LE( rev x8, x8 ) shl MASK.2d, MASK.2d, #57 - eor SHASH2.16b, SHASH2.16b, SHASH.16b + eor SHASH2.16b, SHASH2.16b, T1.16b .if \enc == 1 - ld1 {KS.16b}, [x27] + ldr x10, [sp] + ld1 {KS0.16b-KS1.16b}, [x10] .endif -1: ld1 {CTR.8b}, [x24] // load upper counter - ld1 {INP.16b}, [x22], #16 - rev x9, x28 - add x28, x28, #1 - sub w19, w19, #1 - ins CTR.d[1], x9 // set lower counter + cbnz x6, 4f + +0: ld1 {INP0.16b-INP1.16b}, [x3], #32 + + rev x9, x8 + add x11, x8, #1 + add x8, x8, #2 .if \enc == 1 - eor INP.16b, INP.16b, KS.16b // encrypt input - st1 {INP.16b}, [x21], #16 + eor INP0.16b, INP0.16b, KS0.16b // encrypt input + eor INP1.16b, INP1.16b, KS1.16b .endif - rev64 T1.16b, INP.16b + ld1 {KS0.8b}, [x5] // load upper counter + rev x11, x11 + sub w0, w0, #2 + mov KS1.8b, KS0.8b + ins KS0.d[1], x9 // set lower counter + ins KS1.d[1], x11 - cmp w26, #12 - b.ge 4f // AES-192/256? + rev64 T1.16b, INP1.16b -2: enc_round CTR, v21 + cmp w7, #12 + b.ge 2f // AES-192/256? - ext T2.16b, XL.16b, XL.16b, #8 +1: enc_round KS0, v21 ext IN1.16b, T1.16b, T1.16b, #8 - enc_round CTR, v22 + enc_round KS1, v21 + pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 + + enc_round KS0, v22 + eor T1.16b, T1.16b, IN1.16b + + enc_round KS1, v22 + pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 + enc_round KS0, v23 + pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + + enc_round KS1, v23 + rev64 T1.16b, INP0.16b + ext T2.16b, XL.16b, XL.16b, #8 + + enc_round KS0, v24 + ext IN1.16b, T1.16b, T1.16b, #8 eor T1.16b, T1.16b, T2.16b - eor XL.16b, XL.16b, IN1.16b - enc_round CTR, v23 + enc_round KS1, v24 + eor XL.16b, XL.16b, IN1.16b - pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + enc_round KS0, v25 eor T1.16b, T1.16b, XL.16b - enc_round CTR, v24 + enc_round KS1, v25 + pmull2 XH.1q, HH.2d, XL.2d // a1 * b1 + + enc_round KS0, v26 + pmull XL.1q, HH.1d, XL.1d // a0 * b0 - pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 - pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + enc_round KS1, v26 + pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0) - enc_round CTR, v25 + enc_round KS0, v27 + eor XL.16b, XL.16b, XL2.16b + eor XH.16b, XH.16b, XH2.16b + enc_round KS1, v27 + eor XM.16b, XM.16b, XM2.16b ext T1.16b, XL.16b, XH.16b, #8 + + enc_round KS0, v28 eor T2.16b, XL.16b, XH.16b eor XM.16b, XM.16b, T1.16b - enc_round CTR, v26 - + enc_round KS1, v28 eor XM.16b, XM.16b, T2.16b - pmull T2.1q, XL.1d, MASK.1d - enc_round CTR, v27 + enc_round KS0, v29 + pmull T2.1q, XL.1d, MASK.1d + enc_round KS1, v29 mov XH.d[0], XM.d[1] mov XM.d[1], XL.d[0] - enc_round CTR, v28 - + aese KS0.16b, v30.16b eor XL.16b, XM.16b, T2.16b - enc_round CTR, v29 - + aese KS1.16b, v30.16b ext T2.16b, XL.16b, XL.16b, #8 - aese CTR.16b, v30.16b - + eor KS0.16b, KS0.16b, v31.16b pmull XL.1q, XL.1d, MASK.1d eor T2.16b, T2.16b, XH.16b - eor KS.16b, CTR.16b, v31.16b - + eor KS1.16b, KS1.16b, v31.16b eor XL.16b, XL.16b, T2.16b .if \enc == 0 - eor INP.16b, INP.16b, KS.16b - st1 {INP.16b}, [x21], #16 + eor INP0.16b, INP0.16b, KS0.16b + eor INP1.16b, INP1.16b, KS1.16b .endif - cbz w19, 3f + st1 {INP0.16b-INP1.16b}, [x2], #32 - if_will_cond_yield_neon - st1 {XL.2d}, [x20] - .if \enc == 1 - st1 {KS.16b}, [x27] - .endif - do_cond_yield_neon - b 0b - endif_yield_neon + cbnz w0, 0b - b 1b +CPU_LE( rev x8, x8 ) + st1 {XL.2d}, [x1] + str x8, [x5, #8] // store lower counter -3: st1 {XL.2d}, [x20] .if \enc == 1 - st1 {KS.16b}, [x27] + st1 {KS0.16b-KS1.16b}, [x10] .endif -CPU_LE( rev x28, x28 ) - str x28, [x24, #8] // store lower counter - - frame_pop ret -4: b.eq 5f // AES-192? - enc_round CTR, v17 - enc_round CTR, v18 -5: enc_round CTR, v19 - enc_round CTR, v20 - b 2b +2: b.eq 3f // AES-192? + enc_round KS0, v17 + enc_round KS1, v17 + enc_round KS0, v18 + enc_round KS1, v18 +3: enc_round KS0, v19 + enc_round KS1, v19 + enc_round KS0, v20 + enc_round KS1, v20 + b 1b + +4: load_round_keys w7, x6 + b 0b .endm /* diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 7cf0b1aa6ea8..6e9f33d14930 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -33,9 +33,12 @@ MODULE_ALIAS_CRYPTO("ghash"); #define GCM_IV_SIZE 12 struct ghash_key { - u64 a; - u64 b; - be128 k; + u64 h[2]; + u64 h2[2]; + u64 h3[2]; + u64 h4[2]; + + be128 k; }; struct ghash_desc_ctx { @@ -113,6 +116,9 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src, } } +/* avoid hogging the CPU for too long */ +#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE) + static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int len) { @@ -136,11 +142,16 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, blocks = len / GHASH_BLOCK_SIZE; len %= GHASH_BLOCK_SIZE; - ghash_do_update(blocks, ctx->digest, src, key, - partial ? ctx->buf : NULL); + do { + int chunk = min(blocks, MAX_BLOCKS); - src += blocks * GHASH_BLOCK_SIZE; - partial = 0; + ghash_do_update(chunk, ctx->digest, src, key, + partial ? ctx->buf : NULL); + + blocks -= chunk; + src += chunk * GHASH_BLOCK_SIZE; + partial = 0; + } while (unlikely(blocks > 0)); } if (len) memcpy(ctx->buf + partial, src, len); @@ -166,23 +177,36 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) return 0; } +static void ghash_reflect(u64 h[], const be128 *k) +{ + u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0; + + h[0] = (be64_to_cpu(k->b) << 1) | carry; + h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); + + if (carry) + h[1] ^= 0xc200000000000000UL; +} + static int __ghash_setkey(struct ghash_key *key, const u8 *inkey, unsigned int keylen) { - u64 a, b; + be128 h; /* needed for the fallback */ memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); - /* perform multiplication by 'x' in GF(2^128) */ - b = get_unaligned_be64(inkey); - a = get_unaligned_be64(inkey + 8); + ghash_reflect(key->h, &key->k); - key->a = (a << 1) | (b >> 63); - key->b = (b << 1) | (a >> 63); + h = key->k; + gf128mul_lle(&h, &key->k); + ghash_reflect(key->h2, &h); - if (b >> 63) - key->b ^= 0xc200000000000000UL; + gf128mul_lle(&h, &key->k); + ghash_reflect(key->h3, &h); + + gf128mul_lle(&h, &key->k); + ghash_reflect(key->h4, &h); return 0; } @@ -204,7 +228,6 @@ static struct shash_alg ghash_alg = { .base.cra_name = "ghash", .base.cra_driver_name = "ghash-ce", .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, @@ -245,7 +268,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){}, num_rounds(&ctx->aes_key)); - return __ghash_setkey(&ctx->ghash_key, key, sizeof(key)); + return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128)); } static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) @@ -349,9 +372,10 @@ static int gcm_encrypt(struct aead_request *req) struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); struct skcipher_walk walk; u8 iv[AES_BLOCK_SIZE]; - u8 ks[AES_BLOCK_SIZE]; + u8 ks[2 * AES_BLOCK_SIZE]; u8 tag[AES_BLOCK_SIZE]; u64 dg[2] = {}; + int nrounds = num_rounds(&ctx->aes_key); int err; if (req->assoclen) @@ -360,39 +384,39 @@ static int gcm_encrypt(struct aead_request *req) memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(1, iv + GCM_IV_SIZE); - if (likely(may_use_simd())) { - kernel_neon_begin(); + err = skcipher_walk_aead_encrypt(&walk, req, false); - pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, - num_rounds(&ctx->aes_key)); + if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) { + u32 const *rk = NULL; + + kernel_neon_begin(); + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - pmull_gcm_encrypt_block(ks, iv, NULL, - num_rounds(&ctx->aes_key)); + pmull_gcm_encrypt_block(ks, iv, NULL, nrounds); put_unaligned_be32(3, iv + GCM_IV_SIZE); - kernel_neon_end(); + pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds); + put_unaligned_be32(4, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_encrypt(&walk, req, false); + do { + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; - while (walk.nbytes >= AES_BLOCK_SIZE) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + if (rk) + kernel_neon_begin(); - kernel_neon_begin(); pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, ctx->aes_key.key_enc, - num_rounds(&ctx->aes_key), ks); + iv, rk, nrounds, ks); kernel_neon_end(); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + walk.nbytes % (2 * AES_BLOCK_SIZE)); + + rk = ctx->aes_key.key_enc; + } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, - num_rounds(&ctx->aes_key)); + __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_encrypt(&walk, req, false); - while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; @@ -400,8 +424,7 @@ static int gcm_encrypt(struct aead_request *req) do { __aes_arm64_encrypt(ctx->aes_key.key_enc, - ks, iv, - num_rounds(&ctx->aes_key)); + ks, iv, nrounds); crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -418,19 +441,28 @@ static int gcm_encrypt(struct aead_request *req) } if (walk.nbytes) __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, - num_rounds(&ctx->aes_key)); + nrounds); } /* handle the tail */ if (walk.nbytes) { u8 buf[GHASH_BLOCK_SIZE]; + unsigned int nbytes = walk.nbytes; + u8 *dst = walk.dst.virt.addr; + u8 *head = NULL; crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks, walk.nbytes); - memcpy(buf, walk.dst.virt.addr, walk.nbytes); - memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes); - ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL); + if (walk.nbytes > GHASH_BLOCK_SIZE) { + head = dst; + dst += GHASH_BLOCK_SIZE; + nbytes %= GHASH_BLOCK_SIZE; + } + + memcpy(buf, dst, nbytes); + memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes); + ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head); err = skcipher_walk_done(&walk, 0); } @@ -453,10 +485,11 @@ static int gcm_decrypt(struct aead_request *req) struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); unsigned int authsize = crypto_aead_authsize(aead); struct skcipher_walk walk; - u8 iv[AES_BLOCK_SIZE]; + u8 iv[2 * AES_BLOCK_SIZE]; u8 tag[AES_BLOCK_SIZE]; - u8 buf[GHASH_BLOCK_SIZE]; + u8 buf[2 * GHASH_BLOCK_SIZE]; u64 dg[2] = {}; + int nrounds = num_rounds(&ctx->aes_key); int err; if (req->assoclen) @@ -465,39 +498,53 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(1, iv + GCM_IV_SIZE); - if (likely(may_use_simd())) { - kernel_neon_begin(); + err = skcipher_walk_aead_decrypt(&walk, req, false); + + if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) { + u32 const *rk = NULL; - pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, - num_rounds(&ctx->aes_key)); + kernel_neon_begin(); + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - kernel_neon_end(); - err = skcipher_walk_aead_decrypt(&walk, req, false); + do { + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; + int rem = walk.total - blocks * AES_BLOCK_SIZE; - while (walk.nbytes >= AES_BLOCK_SIZE) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + if (rk) + kernel_neon_begin(); - kernel_neon_begin(); pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, ctx->aes_key.key_enc, - num_rounds(&ctx->aes_key)); + iv, rk, nrounds); + + /* check if this is the final iteration of the loop */ + if (rem < (2 * AES_BLOCK_SIZE)) { + u8 *iv2 = iv + AES_BLOCK_SIZE; + + if (rem > AES_BLOCK_SIZE) { + memcpy(iv2, iv, AES_BLOCK_SIZE); + crypto_inc(iv2, AES_BLOCK_SIZE); + } + + pmull_gcm_encrypt_block(iv, iv, NULL, nrounds); + + if (rem > AES_BLOCK_SIZE) + pmull_gcm_encrypt_block(iv2, iv2, NULL, + nrounds); + } + kernel_neon_end(); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } - if (walk.nbytes) - pmull_gcm_encrypt_block(iv, iv, NULL, - num_rounds(&ctx->aes_key)); + walk.nbytes % (2 * AES_BLOCK_SIZE)); + + rk = ctx->aes_key.key_enc; + } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, - num_rounds(&ctx->aes_key)); + __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_decrypt(&walk, req, false); - while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; @@ -508,8 +555,7 @@ static int gcm_decrypt(struct aead_request *req) do { __aes_arm64_encrypt(ctx->aes_key.key_enc, - buf, iv, - num_rounds(&ctx->aes_key)); + buf, iv, nrounds); crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -522,14 +568,24 @@ static int gcm_decrypt(struct aead_request *req) } if (walk.nbytes) __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, - num_rounds(&ctx->aes_key)); + nrounds); } /* handle the tail */ if (walk.nbytes) { - memcpy(buf, walk.src.virt.addr, walk.nbytes); - memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes); - ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL); + const u8 *src = walk.src.virt.addr; + const u8 *head = NULL; + unsigned int nbytes = walk.nbytes; + + if (walk.nbytes > GHASH_BLOCK_SIZE) { + head = src; + src += GHASH_BLOCK_SIZE; + nbytes %= GHASH_BLOCK_SIZE; + } + + memcpy(buf, src, nbytes); + memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes); + ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head); crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv, walk.nbytes); @@ -554,7 +610,7 @@ static int gcm_decrypt(struct aead_request *req) static struct aead_alg gcm_aes_alg = { .ivsize = GCM_IV_SIZE, - .chunksize = AES_BLOCK_SIZE, + .chunksize = 2 * AES_BLOCK_SIZE, .maxauthsize = AES_BLOCK_SIZE, .setkey = gcm_setkey, .setauthsize = gcm_setauthsize, diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index efbeb3e0dcfb..17fac2889f56 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -99,7 +99,6 @@ static struct shash_alg alg = { .cra_name = "sha1", .cra_driver_name = "sha1-ce", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index fd1ff2b13dfa..261f5195cab7 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -114,7 +114,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha224", .cra_driver_name = "sha224-ce", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -129,7 +128,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha256", .cra_driver_name = "sha256-ce", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index e8880ccdc71f..4aedeaefd61f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -67,8 +67,7 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha256_state), .base.cra_name = "sha256", .base.cra_driver_name = "sha256-arm64", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_priority = 125, .base.cra_blocksize = SHA256_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { @@ -80,8 +79,7 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha256_state), .base.cra_name = "sha224", .base.cra_driver_name = "sha224-arm64", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_priority = 125, .base.cra_blocksize = SHA224_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; @@ -153,7 +151,6 @@ static struct shash_alg neon_algs[] = { { .base.cra_name = "sha256", .base.cra_driver_name = "sha256-arm64-neon", .base.cra_priority = 150, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA256_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { @@ -166,7 +163,6 @@ static struct shash_alg neon_algs[] = { { .base.cra_name = "sha224", .base.cra_driver_name = "sha224-arm64-neon", .base.cra_priority = 150, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA224_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index da8222e528bd..a336feac0f59 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -105,7 +105,6 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-224", .base.cra_driver_name = "sha3-224-ce", - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA3_224_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -117,7 +116,6 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-256", .base.cra_driver_name = "sha3-256-ce", - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA3_256_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -129,7 +127,6 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-384", .base.cra_driver_name = "sha3-384-ce", - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA3_384_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -141,7 +138,6 @@ static struct shash_alg algs[] = { { .descsize = sizeof(struct sha3_state), .base.cra_name = "sha3-512", .base.cra_driver_name = "sha3-512-ce", - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA3_512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index a77c8632a589..f2c5f28c622a 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -87,7 +87,6 @@ static struct shash_alg algs[] = { { .base.cra_name = "sha384", .base.cra_driver_name = "sha384-ce", .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { @@ -100,7 +99,6 @@ static struct shash_alg algs[] = { { .base.cra_name = "sha512", .base.cra_driver_name = "sha512-ce", .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 27db4851e380..325b23b43a9b 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -63,7 +63,6 @@ static struct shash_alg algs[] = { { .base.cra_name = "sha512", .base.cra_driver_name = "sha512-arm64", .base.cra_priority = 150, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { @@ -76,7 +75,6 @@ static struct shash_alg algs[] = { { .base.cra_name = "sha384", .base.cra_driver_name = "sha384-arm64", .base.cra_priority = 150, - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SHA384_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index 3b4948f7e26f..88938a20d9b2 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -72,7 +72,6 @@ static struct shash_alg sm3_alg = { .descsize = sizeof(struct sm3_state), .base.cra_name = "sm3", .base.cra_driver_name = "sm3-ce", - .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, .base.cra_blocksize = SM3_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c0235e0ff849..9bca54dda75c 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -40,17 +40,6 @@ #include <asm/cmpxchg.h> -#define ___atomic_add_unless(v, a, u, sfx) \ -({ \ - typeof((v)->counter) c, old; \ - \ - c = atomic##sfx##_read(v); \ - while (c != (u) && \ - (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c) \ - c = old; \ - c; \ - }) - #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) READ_ONCE((v)->counter) @@ -61,21 +50,11 @@ #define atomic_add_return_release atomic_add_return_release #define atomic_add_return atomic_add_return -#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) -#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) -#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) -#define atomic_inc_return(v) atomic_add_return(1, (v)) - #define atomic_sub_return_relaxed atomic_sub_return_relaxed #define atomic_sub_return_acquire atomic_sub_return_acquire #define atomic_sub_return_release atomic_sub_return_release #define atomic_sub_return atomic_sub_return -#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) -#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) -#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) -#define atomic_dec_return(v) atomic_sub_return(1, (v)) - #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed #define atomic_fetch_add_acquire atomic_fetch_add_acquire #define atomic_fetch_add_release atomic_fetch_add_release @@ -119,13 +98,6 @@ cmpxchg_release(&((v)->counter), (old), (new)) #define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) -#define atomic_inc(v) atomic_add(1, (v)) -#define atomic_dec(v) atomic_sub(1, (v)) -#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) -#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) -#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) -#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -#define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) #define atomic_andnot atomic_andnot /* @@ -140,21 +112,11 @@ #define atomic64_add_return_release atomic64_add_return_release #define atomic64_add_return atomic64_add_return -#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) -#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) -#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) - #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed #define atomic64_sub_return_acquire atomic64_sub_return_acquire #define atomic64_sub_return_release atomic64_sub_return_release #define atomic64_sub_return atomic64_sub_return -#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) -#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) -#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) - #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed #define atomic64_fetch_add_acquire atomic64_fetch_add_acquire #define atomic64_fetch_add_release atomic64_fetch_add_release @@ -195,16 +157,9 @@ #define atomic64_cmpxchg_release atomic_cmpxchg_release #define atomic64_cmpxchg atomic_cmpxchg -#define atomic64_inc(v) atomic64_add(1, (v)) -#define atomic64_dec(v) atomic64_sub(1, (v)) -#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) -#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) -#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) -#define atomic64_add_unless(v, a, u) (___atomic_add_unless(v, a, u, 64) != u) #define atomic64_andnot atomic64_andnot -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define atomic64_dec_if_positive atomic64_dec_if_positive #endif #endif diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h index 9c19594ce7cb..10d536b1af74 100644 --- a/arch/arm64/include/asm/bitops.h +++ b/arch/arm64/include/asm/bitops.h @@ -17,22 +17,11 @@ #define __ASM_BITOPS_H #include <linux/compiler.h> -#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif -/* - * Little endian assembly atomic bitops. - */ -extern void set_bit(int nr, volatile unsigned long *p); -extern void clear_bit(int nr, volatile unsigned long *p); -extern void change_bit(int nr, volatile unsigned long *p); -extern int test_and_set_bit(int nr, volatile unsigned long *p); -extern int test_and_clear_bit(int nr, volatile unsigned long *p); -extern int test_and_change_bit(int nr, volatile unsigned long *p); - #include <asm-generic/bitops/builtin-__ffs.h> #include <asm-generic/bitops/builtin-ffs.h> #include <asm-generic/bitops/builtin-__fls.h> @@ -44,15 +33,11 @@ extern int test_and_change_bit(int nr, volatile unsigned long *p); #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/hweight.h> -#include <asm-generic/bitops/lock.h> +#include <asm-generic/bitops/atomic.h> +#include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/non-atomic.h> #include <asm-generic/bitops/le.h> - -/* - * Ext2 is defined to use little-endian byte ordering. - */ -#define ext2_set_bit_atomic(lock, nr, p) test_and_set_bit_le(nr, p) -#define ext2_clear_bit_atomic(lock, nr, p) test_and_clear_bit_le(nr, p) +#include <asm-generic/bitops/ext2-atomic-setbit.h> #endif /* __ASM_BITOPS_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 192d791f1103..7ed320895d1f 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -87,6 +87,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (true) +#define efi_table_attr(table, attr, instance) \ + ((table##_t *)instance)->attr + #define efi_call_proto(protocol, f, instance, ...) \ ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 41770766d964..6a53e59ced95 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -119,13 +119,16 @@ static inline void decode_ctrl_reg(u32 reg, struct task_struct; struct notifier_block; +struct perf_event_attr; struct perf_event; struct pmu; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, int *gen_len, int *gen_type, int *offset); -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +extern int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index a0fee6985e6a..b2b0c6405eb0 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,8 +8,6 @@ struct pt_regs; -extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); - static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 6deb8d726041..d5a44cf859e9 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -48,7 +48,6 @@ struct kprobe_ctlblk { unsigned long saved_irqflag; struct prev_kprobe prev_kprobe; struct kprobe_step_ctx ss_ctx; - struct pt_regs jprobe_saved_regs; }; void arch_remove_kprobe(struct kprobe *); diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index fa8b3fe932e6..6495cc51246f 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -29,20 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * The raw_cpu_read() is racy if called with preemption enabled. - * This is not a bug: kernel_neon_busy is only set when - * preemption is disabled, so we cannot migrate to another CPU - * while it is set, nor can we migrate to a CPU where it is set. - * So, if we find it clear on some CPU then we're guaranteed to - * find it clear on any CPU we could migrate to. - * - * If we are in between kernel_neon_begin()...kernel_neon_end(), - * the flag will be set, but preemption is also disabled, so we - * can't migrate to another CPU and spuriously see it become - * false. + * kernel_neon_busy is only set while preemption is disabled, + * and is clear whenever preemption is enabled. Since + * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * cannot change under our feet -- if it's set we cannot be + * migrated, and if it's clear we cannot be migrated to a CPU + * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !raw_cpu_read(kernel_neon_busy); + !this_cpu_read(kernel_neon_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index ffdaea7954bb..0ad1cf233470 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,7 +37,7 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); /* * The ASID allocator will either invalidate the ASID or mark diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4b3dc9fc9c68..611e8921c3d4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1351,9 +1351,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void update_cpu_capabilities(u16 scope_mask) { - __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); __update_cpu_capabilities(arm64_errata, scope_mask, "enabling workaround for"); + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); } static int __enable_cpu_capability(void *arg) @@ -1408,8 +1408,8 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(u16 scope_mask) { - __enable_cpu_capabilities(arm64_features, scope_mask); __enable_cpu_capabilities(arm64_errata, scope_mask); + __enable_cpu_capabilities(arm64_features, scope_mask); } /* diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 413dbe530da8..8c9644376326 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -343,14 +343,13 @@ static int get_hbp_len(u8 hbp_len) /* * Check whether bp virtual address is in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) { unsigned int len; unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->ctrl.len); + va = hw->address; + len = get_hbp_len(hw->ctrl.len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -421,53 +420,53 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, /* * Construct an arch_hw_breakpoint from a perf_event. */ -static int arch_build_bp_info(struct perf_event *bp) +static int arch_build_bp_info(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* Type */ - switch (bp->attr.bp_type) { + switch (attr->bp_type) { case HW_BREAKPOINT_X: - info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + hw->ctrl.type = ARM_BREAKPOINT_EXECUTE; break; case HW_BREAKPOINT_R: - info->ctrl.type = ARM_BREAKPOINT_LOAD; + hw->ctrl.type = ARM_BREAKPOINT_LOAD; break; case HW_BREAKPOINT_W: - info->ctrl.type = ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_STORE; break; case HW_BREAKPOINT_RW: - info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; break; default: return -EINVAL; } /* Len */ - switch (bp->attr.bp_len) { + switch (attr->bp_len) { case HW_BREAKPOINT_LEN_1: - info->ctrl.len = ARM_BREAKPOINT_LEN_1; + hw->ctrl.len = ARM_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - info->ctrl.len = ARM_BREAKPOINT_LEN_2; + hw->ctrl.len = ARM_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_3: - info->ctrl.len = ARM_BREAKPOINT_LEN_3; + hw->ctrl.len = ARM_BREAKPOINT_LEN_3; break; case HW_BREAKPOINT_LEN_4: - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; break; case HW_BREAKPOINT_LEN_5: - info->ctrl.len = ARM_BREAKPOINT_LEN_5; + hw->ctrl.len = ARM_BREAKPOINT_LEN_5; break; case HW_BREAKPOINT_LEN_6: - info->ctrl.len = ARM_BREAKPOINT_LEN_6; + hw->ctrl.len = ARM_BREAKPOINT_LEN_6; break; case HW_BREAKPOINT_LEN_7: - info->ctrl.len = ARM_BREAKPOINT_LEN_7; + hw->ctrl.len = ARM_BREAKPOINT_LEN_7; break; case HW_BREAKPOINT_LEN_8: - info->ctrl.len = ARM_BREAKPOINT_LEN_8; + hw->ctrl.len = ARM_BREAKPOINT_LEN_8; break; default: return -EINVAL; @@ -478,37 +477,37 @@ static int arch_build_bp_info(struct perf_event *bp) * AArch32 also requires breakpoints of length 2 for Thumb. * Watchpoints can be of length 1, 2, 4 or 8 bytes. */ - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) { if (is_compat_bp(bp)) { - if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && - info->ctrl.len != ARM_BREAKPOINT_LEN_4) + if (hw->ctrl.len != ARM_BREAKPOINT_LEN_2 && + hw->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; - } else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { + } else if (hw->ctrl.len != ARM_BREAKPOINT_LEN_4) { /* * FIXME: Some tools (I'm looking at you perf) assume * that breakpoints should be sizeof(long). This * is nonsense. For now, we fix up the parameter * but we should probably return -EINVAL instead. */ - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; } } /* Address */ - info->address = bp->attr.bp_addr; + hw->address = attr->bp_addr; /* * Privilege * Note that we disallow combined EL0/EL1 breakpoints because * that would complicate the stepping code. */ - if (arch_check_bp_in_kernelspace(bp)) - info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; + if (arch_check_bp_in_kernelspace(hw)) + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL1; else - info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL0; /* Enabled? */ - info->ctrl.enabled = !bp->attr.disabled; + hw->ctrl.enabled = !attr->disabled; return 0; } @@ -516,14 +515,15 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings. */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret; u64 alignment_mask, offset; /* Build the arch_hw_breakpoint. */ - ret = arch_build_bp_info(bp); + ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; @@ -537,42 +537,42 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * that here. */ if (is_compat_bp(bp)) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else alignment_mask = 0x3; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; switch (offset) { case 0: /* Aligned */ break; case 1: /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; case 2: /* Allow halfword watchpoints and breakpoints. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; default: return -EINVAL; } } else { - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; } - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + hw->ctrl.len <<= offset; /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. */ - if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) + if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) return -EINVAL; return 0; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 60e5fc661f74..780a12f59a8f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -42,16 +42,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -void (*handle_arch_irq)(struct pt_regs *) = NULL; - -void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) -{ - if (handle_arch_irq) - return; - - handle_arch_irq = handle_irq; -} - #ifdef CONFIG_VMAP_STACK static void init_irq_stacks(void) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d849d9804011..e78c3ef04d95 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -275,7 +275,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); break; @@ -395,9 +395,9 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) /* * If we have no pre-handler or it returned 0, we * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry, - * so get out doing nothing more here. + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. * * pre_handler can hit a breakpoint and can step thru * before return, keep PSTATE D-flag enabled until @@ -405,16 +405,8 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); - return; - } - } - } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == - BRK64_OPCODE_KPROBES) && cur_kprobe) { - /* We probably hit a jprobe. Call its break handler. */ - if (cur_kprobe->break_handler && - cur_kprobe->break_handler(cur_kprobe, regs)) { - setup_singlestep(cur_kprobe, regs, kcb, 0); - return; + } else + reset_current_kprobe(); } } /* @@ -465,74 +457,6 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - /* - * Since we can't be sure where in the stack frame "stacked" - * pass-by-value arguments are stored we just don't try to - * duplicate any of the stack. Do not use jprobes on functions that - * use more than 64 bytes (after padding each to an 8 byte boundary) - * of arguments, or pass individual arguments larger than 16 bytes. - */ - - instruction_pointer_set(regs, (unsigned long) jp->entry); - preempt_disable(); - pause_graph_tracing(); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - /* - * Jprobe handler return by entering break exception, - * encoded same as kprobe, but with following conditions - * -a special PC to identify it from the other kprobes. - * -restore stack addr to original saved pt_regs - */ - asm volatile(" mov sp, %0 \n" - "jprobe_return_break: brk %1 \n" - : - : "r" (kcb->jprobe_saved_regs.sp), - "I" (BRK64_ESR_KPROBES) - : "memory"); - - unreachable(); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long stack_addr = kcb->jprobe_saved_regs.sp; - long orig_sp = kernel_stack_pointer(regs); - struct jprobe *jp = container_of(p, struct jprobe, kp); - extern const char jprobe_return_break[]; - - if (instruction_pointer(regs) != (u64) jprobe_return_break) - return 0; - - if (orig_sp != stack_addr) { - struct pt_regs *saved_regs = - (struct pt_regs *)kcb->jprobe_saved_regs.sp; - pr_err("current sp %lx does not match saved sp %lx\n", - orig_sp, stack_addr); - pr_err("Saved registers for jprobe %p\n", jp); - __show_regs(saved_regs); - pr_err("Current registers\n"); - __show_regs(regs); - BUG(); - } - unpause_graph_tracing(); - *regs = kcb->jprobe_saved_regs; - preempt_enable_no_resched(); - return 1; -} - bool arch_within_kprobe_blacklist(unsigned long addr) { if ((addr >= (unsigned long)__kprobes_text_start && diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S index b82c85e5d972..e20483b104d9 100644 --- a/arch/arm64/kernel/vdso/note.S +++ b/arch/arm64/kernel/vdso/note.S @@ -22,7 +22,10 @@ #include <linux/uts.h> #include <linux/version.h> #include <linux/elfnote.h> +#include <linux/build-salt.h> ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END + +BUILD_SALT diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 137710f4dac3..68755fd70dcf 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ +lib-y := clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S deleted file mode 100644 index 43ac736baa5b..000000000000 --- a/arch/arm64/lib/bitops.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Based on arch/arm/lib/bitops.h - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/lse.h> - -/* - * x0: bits 5:0 bit offset - * bits 31:6 word offset - * x1: address - */ - .macro bitop, name, llsc, lse -ENTRY( \name ) - and w3, w0, #63 // Get bit offset - eor w0, w0, w3 // Clear low bits - mov x2, #1 - add x1, x1, x0, lsr #3 // Get word offset -alt_lse " prfm pstl1strm, [x1]", "nop" - lsl x3, x2, x3 // Create mask - -alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]" -alt_lse " \llsc x2, x2, x3", "nop" -alt_lse " stxr w0, x2, [x1]", "nop" -alt_lse " cbnz w0, 1b", "nop" - - ret -ENDPROC(\name ) - .endm - - .macro testop, name, llsc, lse -ENTRY( \name ) - and w3, w0, #63 // Get bit offset - eor w0, w0, w3 // Clear low bits - mov x2, #1 - add x1, x1, x0, lsr #3 // Get word offset -alt_lse " prfm pstl1strm, [x1]", "nop" - lsl x4, x2, x3 // Create mask - -alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]" - lsr x0, x2, x3 -alt_lse " \llsc x2, x2, x4", "nop" -alt_lse " stlxr w5, x2, [x1]", "nop" -alt_lse " cbnz w5, 1b", "nop" -alt_lse " dmb ish", "nop" - - and x0, x0, #1 - ret -ENDPROC(\name ) - .endm - -/* - * Atomic bit operations. - */ - bitop change_bit, eor, steor - bitop clear_bit, bic, stclr - bitop set_bit, orr, stset - - testop test_and_change_bit, eor, ldeoral - testop test_and_clear_bit, bic, ldclral - testop test_and_set_bit, orr, ldsetal diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ea591c9e5144..9943690a3924 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -727,12 +727,7 @@ static const struct fault_info fault_info[] = { int handle_guest_sea(phys_addr_t addr, unsigned int esr) { - int ret = -ENOENT; - - if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) - ret = ghes_notify_sea(); - - return ret; + return ghes_notify_sea(); } asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index ecc6818191df..192b3ba07075 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -108,7 +108,6 @@ static pte_t get_clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; @@ -125,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm, orig_pte = pte_mkdirty(orig_pte); } - if (valid) + if (valid) { + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); flush_tlb_range(&vma, saddr, addr); + } return orig_pte; } @@ -145,7 +146,7 @@ static void clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); unsigned long i, saddr = addr; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 811f9f8b3bb0..787e27964ab9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -615,11 +615,13 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Make sure we chose the upper bound of sizeof(struct page) - * correctly. + * correctly when sizing the VMEMMAP array. */ BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT)); +#endif if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; |