summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig43
-rw-r--r--arch/arm64/Kconfig.debug21
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx.dtsi18
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts16
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi2
-rw-r--r--arch/arm64/boot/dts/exynos/exynos5433.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173.dtsi6
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts2
-rw-r--r--arch/arm64/boot/dts/xilinx/zynqmp.dtsi6
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/Kconfig24
-rw-r--r--arch/arm64/crypto/Makefile13
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c1
-rw-r--r--arch/arm64/crypto/aes-cipher-core.S110
-rw-r--r--arch/arm64/crypto/aes-cipher-glue.c69
-rw-r--r--arch/arm64/crypto/aes-glue.c281
-rw-r--r--arch/arm64/crypto/aes-modes.S119
-rw-r--r--arch/arm64/crypto/aes-neon.S235
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S972
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c439
-rw-r--r--arch/arm64/crypto/chacha20-neon-core.S450
-rw-r--r--arch/arm64/crypto/chacha20-neon-glue.c126
-rw-r--r--arch/arm64/crypto/crc32-arm64.c290
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c49
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/arch_timer.h38
-rw-r--r--arch/arm64/include/asm/assembler.h59
-rw-r--r--arch/arm64/include/asm/cpucaps.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h26
-rw-r--r--arch/arm64/include/asm/cputype.h7
-rw-r--r--arch/arm64/include/asm/efi.h1
-rw-r--r--arch/arm64/include/asm/insn.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h10
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h10
-rw-r--r--arch/arm64/include/asm/lse.h2
-rw-r--r--arch/arm64/include/asm/memory.h68
-rw-r--r--arch/arm64/include/asm/mmu_context.h14
-rw-r--r--arch/arm64/include/asm/pgtable.h17
-rw-r--r--arch/arm64/include/asm/processor.h1
-rw-r--r--arch/arm64/include/asm/ptrace.h20
-rw-r--r--arch/arm64/include/asm/sysreg.h26
-rw-r--r--arch/arm64/include/asm/tlbflush.h18
-rw-r--r--arch/arm64/include/asm/uaccess.h4
-rw-r--r--arch/arm64/include/asm/virt.h9
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h13
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c3
-rw-r--r--arch/arm64/kernel/arm64ksyms.c4
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c7
-rw-r--r--arch/arm64/kernel/cacheinfo.c13
-rw-r--r--arch/arm64/kernel/cpu-reset.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c33
-rw-r--r--arch/arm64/kernel/cpufeature.c426
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/efi-entry.S9
-rw-r--r--arch/arm64/kernel/entry-ftrace.S13
-rw-r--r--arch/arm64/kernel/entry.S2
-rw-r--r--arch/arm64/kernel/head.S73
-rw-r--r--arch/arm64/kernel/hibernate.c26
-rw-r--r--arch/arm64/kernel/insn.c33
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c18
-rw-r--r--arch/arm64/kernel/process.c6
-rw-r--r--arch/arm64/kernel/psci.c3
-rw-r--r--arch/arm64/kernel/ptrace.c16
-rw-r--r--arch/arm64/kernel/setup.c13
-rw-r--r--arch/arm64/kernel/smccc-call.S23
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/arm64/kernel/smp_spin_table.c3
-rw-r--r--arch/arm64/kernel/topology.c10
-rw-r--r--arch/arm64/kernel/traps.c41
-rw-r--r--arch/arm64/kernel/vdso.c8
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/debug.c6
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c66
-rw-r--r--arch/arm64/kvm/hyp/switch.c17
-rw-r--r--arch/arm64/kvm/hyp/tlb.c13
-rw-r--r--arch/arm64/kvm/reset.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c92
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c346
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/context.c11
-rw-r--r--arch/arm64/mm/dma-mapping.c45
-rw-r--r--arch/arm64/mm/fault.c14
-rw-r--r--arch/arm64/mm/hugetlbpage.c2
-rw-r--r--arch/arm64/mm/init.c14
-rw-r--r--arch/arm64/mm/ioremap.c2
-rw-r--r--arch/arm64/mm/kasan_init.c22
-rw-r--r--arch/arm64/mm/mmu.c35
-rw-r--r--arch/arm64/mm/physaddr.c30
-rw-r--r--arch/arm64/mm/proc.S1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c22
-rw-r--r--arch/arm64/xen/hypercall.S1
97 files changed, 4226 insertions, 980 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 111742126897..a39029b5414e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -6,13 +6,17 @@ config ARM64
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
select ARCH_CLOCKSOURCE_DATA
+ select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
@@ -96,7 +100,7 @@ config ARM64
select HAVE_RCU_TABLE_FREE
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
- select HAVE_KRETPROBES if HAVE_KPROBES
+ select HAVE_KRETPROBES
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
@@ -123,9 +127,6 @@ config ARCH_PHYS_ADDR_T_64BIT
config MMU
def_bool y
-config DEBUG_RODATA
- def_bool y
-
config ARM64_PAGE_SHIFT
int
default 16 if ARM64_64K_PAGES
@@ -479,6 +480,34 @@ config CAVIUM_ERRATUM_27456
If unsure, say Y.
+config QCOM_FALKOR_ERRATUM_1003
+ bool "Falkor E1003: Incorrect translation due to ASID change"
+ default y
+ select ARM64_PAN if ARM64_SW_TTBR0_PAN
+ help
+ On Falkor v1, an incorrect ASID may be cached in the TLB when ASID
+ and BADDR are changed together in TTBRx_EL1. The workaround for this
+ issue is to use a reserved ASID in cpu_do_switch_mm() before
+ switching to the new ASID. Saying Y here selects ARM64_PAN if
+ ARM64_SW_TTBR0_PAN is selected. This is done because implementing and
+ maintaining the E1003 workaround in the software PAN emulation code
+ would be an unnecessary complication. The affected Falkor v1 CPU
+ implements ARMv8.1 hardware PAN support and using hardware PAN
+ support versus software PAN emulation is mutually exclusive at
+ runtime.
+
+ If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_1009
+ bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
+ default y
+ help
+ On Falkor v1, the CPU may prematurely complete a DSB following a
+ TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation
+ one more time to fix the issue.
+
+ If unsure, say Y.
+
endmenu
@@ -614,6 +643,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
depends on NUMA
+config HOLES_IN_ZONE
+ def_bool y
+ depends on NUMA
+
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
@@ -1010,7 +1043,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
depends on ARM64_4K_PAGES || EXPERT
- select COMPAT_BINFMT_ELF
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
select COMPAT_OLD_SIGACTION
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index d1ebd46872fd..fca2f02cde68 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -71,19 +71,8 @@ config DEBUG_WX
If in doubt, say "Y".
-config DEBUG_SET_MODULE_RONX
- bool "Set loadable kernel module data as NX and text as RO"
- depends on MODULES
- default y
- help
- Is this is set, kernel module text and rodata will be made read-only.
- This is to help catch accidental or malicious attempts to change the
- kernel's executable code.
-
- If in doubt, say Y.
-
config DEBUG_ALIGN_RODATA
- depends on DEBUG_RODATA
+ depends on STRICT_KERNEL_RWX
bool "Align linker sections up to SECTION_SIZE"
help
If this option is enabled, sections that may potentially be marked as
@@ -95,6 +84,14 @@ config DEBUG_ALIGN_RODATA
If in doubt, say N.
+config DEBUG_EFI
+ depends on EFI && DEBUG_INFO
+ bool "UEFI debugging"
+ help
+ Enable this option to include EFI specific debugging features into
+ the kernel that are only useful when using a debug build of the
+ UEFI firmware
+
source "drivers/hwtracing/coresight/Kconfig"
endmenu
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index eada0b58ba1c..0cbe24b49710 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -55,6 +55,24 @@
#address-cells = <2>;
#size-cells = <2>;
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ /* 16 MiB reserved for Hardware ROM Firmware */
+ hwrom_reserved: hwrom@0 {
+ reg = <0x0 0x0 0x0 0x1000000>;
+ no-map;
+ };
+
+ /* 2 MiB reserved for ARM Trusted Firmware (BL31) */
+ secmon_reserved: secmon@10000000 {
+ reg = <0x0 0x10000000 0x0 0x200000>;
+ no-map;
+ };
+ };
+
cpus {
#address-cells = <0x2>;
#size-cells = <0x0>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
index 238fbeacd330..c59403adb387 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
@@ -137,6 +137,10 @@
};
};
+&scpi_clocks {
+ status = "disabled";
+};
+
&uart_AO {
status = "okay";
pinctrl-0 = <&uart_ao_a_pins>;
@@ -147,6 +151,18 @@
status = "okay";
pinctrl-0 = <&eth_rgmii_pins>;
pinctrl-names = "default";
+ phy-handle = <&eth_phy0>;
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eth_phy0: ethernet-phy@0 {
+ reg = <0>;
+ eee-broken-1000t;
+ };
+ };
};
&ir {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 596240c38a9c..b35307321b63 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -55,7 +55,7 @@
mboxes = <&mailbox 1 &mailbox 2>;
shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
- clocks {
+ scpi_clocks: clocks {
compatible = "arm,scpi-clocks";
scpi_dvfs: scpi_clocks@0 {
diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
index 64226d5ae471..135890cd8a85 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
@@ -1367,7 +1367,7 @@
};
amba {
- compatible = "arm,amba-bus";
+ compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges;
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 12e702771f5c..40a02b29213e 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -728,9 +728,11 @@
<&phy_port1 PHY_TYPE_USB2>;
power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>;
clocks = <&topckgen CLK_TOP_USB30_SEL>,
+ <&clk26m>,
<&pericfg CLK_PERI_USB0>,
<&pericfg CLK_PERI_USB1>;
clock-names = "sys_ck",
+ "ref_ck",
"wakeup_deb_p0",
"wakeup_deb_p1";
mediatek,syscon-wakeup = <&pericfg>;
@@ -745,8 +747,8 @@
reg-names = "mac";
interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_LOW>;
power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>;
- clocks = <&topckgen CLK_TOP_USB30_SEL>;
- clock-names = "sys_ck";
+ clocks = <&topckgen CLK_TOP_USB30_SEL>, <&clk26m>;
+ clock-names = "sys_ck", "ref_ck";
status = "disabled";
};
};
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts
index 358089687a69..ef1b9e573af0 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts
+++ b/arch/arm64/boot/dts/xilinx/zynqmp-ep108.dts
@@ -27,7 +27,7 @@
stdout-path = "serial0:115200n8";
};
- memory {
+ memory@0 {
device_type = "memory";
reg = <0x0 0x0 0x0 0x40000000>;
};
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index 68a908334c7b..54dc28351c8c 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -72,7 +72,7 @@
<1 10 0xf08>;
};
- amba_apu {
+ amba_apu: amba_apu@0 {
compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <1>;
@@ -175,7 +175,7 @@
};
i2c0: i2c@ff020000 {
- compatible = "cdns,i2c-r1p10";
+ compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10";
status = "disabled";
interrupt-parent = <&gic>;
interrupts = <0 17 4>;
@@ -185,7 +185,7 @@
};
i2c1: i2c@ff030000 {
- compatible = "cdns,i2c-r1p10";
+ compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10";
status = "disabled";
interrupt-parent = <&gic>;
interrupts = <0 18 4>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 4c1a3455cb7b..8752f578417c 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -519,4 +519,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
-CONFIG_CRYPTO_CRC32_ARM64=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 450a85df041a..d92293747d63 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -37,10 +37,14 @@ config CRYPTO_CRCT10DIF_ARM64_CE
select CRYPTO_HASH
config CRYPTO_CRC32_ARM64_CE
- tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC32
+ tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
+ depends on CRC32
select CRYPTO_HASH
+config CRYPTO_AES_ARM64
+ tristate "AES core cipher using scalar instructions"
+ select CRYPTO_AES
+
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
@@ -67,9 +71,17 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES
select CRYPTO_SIMD
-config CRYPTO_CRC32_ARM64
- tristate "CRC32 and CRC32C using optional ARMv8 instructions"
- depends on ARM64
- select CRYPTO_HASH
+config CRYPTO_CHACHA20_NEON
+ tristate "NEON accelerated ChaCha20 symmetric cipher"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_CHACHA20
+
+config CRYPTO_AES_ARM64_BS
+ tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES_ARM64_NEON_BLK
+ select CRYPTO_SIMD
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index aa8888d7b744..b5edc5918c28 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -41,15 +41,20 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
+aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
+aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
+
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
-obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
-
-CFLAGS_crc32-arm64.o := -mcpu=generic+crc
-
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index cc5515dac74a..6a7dbc7c83a6 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = {
.cra_priority = 300,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.ivsize = AES_BLOCK_SIZE,
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
new file mode 100644
index 000000000000..f2f9cc519309
--- /dev/null
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -0,0 +1,110 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+ rk .req x0
+ out .req x1
+ in .req x2
+ rounds .req x3
+ tt .req x4
+ lt .req x2
+
+ .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
+ ubfx \reg0, \in0, #\shift, #8
+ .if \enc
+ ubfx \reg1, \in1e, #\shift, #8
+ .else
+ ubfx \reg1, \in1d, #\shift, #8
+ .endif
+ ldr \reg0, [tt, \reg0, uxtw #2]
+ ldr \reg1, [tt, \reg1, uxtw #2]
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+ ldp \out0, \out1, [rk], #8
+
+ __pair \enc, w13, w14, \in0, \in1, \in3, 0
+ __pair \enc, w15, w16, \in1, \in2, \in0, 8
+ __pair \enc, w17, w18, \in2, \in3, \in1, 16
+ __pair \enc, \t0, \t1, \in3, \in0, \in2, 24
+
+ eor \out0, \out0, w13
+ eor \out1, \out1, w14
+ eor \out0, \out0, w15, ror #24
+ eor \out1, \out1, w16, ror #24
+ eor \out0, \out0, w17, ror #16
+ eor \out1, \out1, w18, ror #16
+ eor \out0, \out0, \t0, ror #8
+ eor \out1, \out1, \t1, ror #8
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .endm
+
+ .macro do_crypt, round, ttab, ltab
+ ldp w5, w6, [in]
+ ldp w7, w8, [in, #8]
+ ldp w9, w10, [rk], #16
+ ldp w11, w12, [rk, #-8]
+
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+
+ eor w5, w5, w9
+ eor w6, w6, w10
+ eor w7, w7, w11
+ eor w8, w8, w12
+
+ adr_l tt, \ttab
+ adr_l lt, \ltab
+
+ tbnz rounds, #1, 1f
+
+0: \round w9, w10, w11, w12, w5, w6, w7, w8
+ \round w5, w6, w7, w8, w9, w10, w11, w12
+
+1: subs rounds, rounds, #4
+ \round w9, w10, w11, w12, w5, w6, w7, w8
+ csel tt, tt, lt, hi
+ \round w5, w6, w7, w8, w9, w10, w11, w12
+ b.hi 0b
+
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+
+ stp w5, w6, [out]
+ stp w7, w8, [out, #8]
+ ret
+ .endm
+
+ .align 5
+ENTRY(__aes_arm64_encrypt)
+ do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ENDPROC(__aes_arm64_encrypt)
+
+ .align 5
+ENTRY(__aes_arm64_decrypt)
+ do_crypt iround, crypto_it_tab, crypto_il_tab
+ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c
new file mode 100644
index 000000000000..7288e7cbebff
--- /dev/null
+++ b/arch/arm64/crypto/aes-cipher-glue.c
@@ -0,0 +1,69 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+EXPORT_SYMBOL(__aes_arm64_encrypt);
+
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+EXPORT_SYMBOL(__aes_arm64_decrypt);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-arm64",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+
+ .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cra_cipher.cia_setkey = crypto_aes_set_key,
+ .cra_cipher.cia_encrypt = aes_encrypt,
+ .cra_cipher.cia_decrypt = aes_decrypt
+};
+
+static int __init aes_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Scalar AES cipher for arm64");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 4e3f8adb1793..bcf596b0197e 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -11,6 +11,7 @@
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
+#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
@@ -31,6 +32,7 @@
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
+#define aes_mac_update ce_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
@@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
+#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("cmac(aes)");
+MODULE_ALIAS_CRYPTO("xcbc(aes)");
+MODULE_ALIAS_CRYPTO("cbcmac(aes)");
#endif
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 const rk2[], u8 iv[],
int first);
+asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ int blocks, u8 dg[], int enc_before,
+ int enc_after);
+
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
};
+struct mac_tfm_ctx {
+ struct crypto_aes_ctx key;
+ u8 __aligned(8) consts[];
+};
+
+struct mac_desc_ctx {
+ unsigned int len;
+ u8 dg[AES_BLOCK_SIZE];
+};
+
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -215,14 +235,15 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *tsrc = walk.src.virt.addr;
/*
- * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
- * to tell aes_ctr_encrypt() to only read half a block.
+ * Tell aes_ctr_encrypt() to process a tail block.
*/
- blocks = (nbytes <= 8) ? -1 : 1;
+ blocks = -1;
- aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+ aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
blocks, walk.iv, first);
- memcpy(tdst, tail, nbytes);
+ if (tdst != tsrc)
+ memcpy(tdst, tsrc, nbytes);
+ crypto_xor(tdst, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
@@ -282,7 +303,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -298,7 +318,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -315,7 +334,22 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .setkey = skcipher_aes_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+}, {
+ .base = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-" MODE,
+ .cra_priority = PRIO - 1,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -333,7 +367,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
@@ -344,15 +377,228 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = xts_decrypt,
} };
+static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ int err;
+
+ err = aes_expandkey(&ctx->key, in_key, key_len);
+ if (err)
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+ return err;
+}
+
+static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
+{
+ u64 a = be64_to_cpu(x->a);
+ u64 b = be64_to_cpu(x->b);
+
+ y->a = cpu_to_be64((a << 1) | (b >> 63));
+ y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+}
+
+static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *consts = (be128 *)ctx->consts;
+ u8 *rk = (u8 *)ctx->key.key_enc;
+ int rounds = 6 + key_len / 4;
+ int err;
+
+ err = cbcmac_setkey(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ /* encrypt the zero vector */
+ kernel_neon_begin();
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
+ kernel_neon_end();
+
+ cmac_gf128_mul_by_x(consts, consts);
+ cmac_gf128_mul_by_x(consts + 1, consts);
+
+ return 0;
+}
+
+static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ static u8 const ks[3][AES_BLOCK_SIZE] = {
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
+ };
+
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ u8 *rk = (u8 *)ctx->key.key_enc;
+ int rounds = 6 + key_len / 4;
+ u8 key[AES_BLOCK_SIZE];
+ int err;
+
+ err = cbcmac_setkey(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
+ kernel_neon_end();
+
+ return cbcmac_setkey(tfm, key, sizeof(key));
+}
+
+static int mac_init(struct shash_desc *desc)
+{
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ memset(ctx->dg, 0, AES_BLOCK_SIZE);
+ ctx->len = 0;
+
+ return 0;
+}
+
+static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+
+ while (len > 0) {
+ unsigned int l;
+
+ if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
+ (ctx->len + len) > AES_BLOCK_SIZE) {
+
+ int blocks = len / AES_BLOCK_SIZE;
+
+ len %= AES_BLOCK_SIZE;
+
+ kernel_neon_begin();
+ aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
+ ctx->dg, (ctx->len != 0), (len != 0));
+ kernel_neon_end();
+
+ p += blocks * AES_BLOCK_SIZE;
+
+ if (!len) {
+ ctx->len = AES_BLOCK_SIZE;
+ break;
+ }
+ ctx->len = 0;
+ }
+
+ l = min(len, AES_BLOCK_SIZE - ctx->len);
+
+ if (l <= AES_BLOCK_SIZE) {
+ crypto_xor(ctx->dg + ctx->len, p, l);
+ ctx->len += l;
+ len -= l;
+ p += l;
+ }
+ }
+
+ return 0;
+}
+
+static int cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+
+ kernel_neon_begin();
+ aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
+ kernel_neon_end();
+
+ memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int cmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+ u8 *consts = tctx->consts;
+
+ if (ctx->len != AES_BLOCK_SIZE) {
+ ctx->dg[ctx->len] ^= 0x80;
+ consts += AES_BLOCK_SIZE;
+ }
+
+ kernel_neon_begin();
+ aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
+ kernel_neon_end();
+
+ memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg mac_algs[] = { {
+ .base.cra_name = "cmac(aes)",
+ .base.cra_driver_name = "cmac-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
+ 2 * AES_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cmac_final,
+ .setkey = cmac_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+}, {
+ .base.cra_name = "xcbc(aes)",
+ .base.cra_driver_name = "xcbc-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
+ 2 * AES_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cmac_final,
+ .setkey = xcbc_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+}, {
+ .base.cra_name = "cbcmac(aes)",
+ .base.cra_driver_name = "cbcmac-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cbcmac_final,
+ .setkey = cbcmac_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+} };
+
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
- for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
- simd_skcipher_free(aes_simd_algs[i]);
+ for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
+ if (aes_simd_algs[i])
+ simd_skcipher_free(aes_simd_algs[i]);
+ crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
@@ -369,7 +615,14 @@ static int __init aes_init(void)
if (err)
return err;
+ err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
+ if (err)
+ goto unregister_ciphers;
+
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+ if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
+ continue;
+
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
@@ -385,6 +638,8 @@ static int __init aes_init(void)
unregister_simds:
aes_exit();
+unregister_ciphers:
+ crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
return err;
}
@@ -392,5 +647,7 @@ unregister_simds:
module_cpu_feature_match(AES, aes_init);
#else
module_init(aes_init);
+EXPORT_SYMBOL(neon_aes_ecb_encrypt);
+EXPORT_SYMBOL(neon_aes_cbc_encrypt);
#endif
module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index c53dbeae79f2..2674d43d1384 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt)
cbz w6, .Lcbcencloop
ld1 {v0.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x5
+ enc_prepare w3, x2, x6
.Lcbcencloop:
ld1 {v1.16b}, [x1], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x5, w6
+ encrypt_block v0, w3, x2, x6, w7
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
+ st1 {v0.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt)
cbz w6, .LcbcdecloopNx
ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x5
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
#if INTERLEAVE >= 2
@@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt)
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x5, w6
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
@@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt)
bne .Lcbcdecloop
.Lcbcdecout:
FRAME_POP
+ st1 {v7.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt)
AES_ENTRY(aes_ctr_encrypt)
FRAME_PUSH
- cbnz w6, .Lctrfirst /* 1st time around? */
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
-#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
- bcs .Lctrinc
- add x5, x5, #1 /* increment BE ctr */
- b .LctrincNx
-#else
- b .Lctrinc
-#endif
-.Lctrfirst:
+ cbz w6, .Lctrnotfirst /* 1st time around? */
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
+
+.Lctrnotfirst:
+ umov x8, v4.d[1] /* keep swabbed ctr in reg */
+ rev x8, x8
#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
+ cmn w8, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #INTERLEAVE
@@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt)
#if INTERLEAVE == 2
mov v0.8b, v4.8b
mov v1.8b, v4.8b
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v0.d[1], x7
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v1.d[1], x7
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
do_encrypt_block2x
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt)
st1 {v0.16b-v1.16b}, [x0], #32
#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w5
+ dup v7.4s, w8
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
@@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt)
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
- add x5, x5, #INTERLEAVE
+ add x8, x8, #INTERLEAVE
#endif
- cbz w4, .LctroutNx
-.LctrincNx:
- rev x7, x5
+ rev x7, x8
ins v4.d[1], x7
+ cbz w4, .Lctrout
b .LctrloopNx
-.LctroutNx:
- sub x5, x5, #1
- rev x7, x5
- ins v4.d[1], x7
- b .Lctrout
.Lctr1x:
adds w4, w4, #INTERLEAVE
beq .Lctrout
@@ -342,30 +329,37 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
+
+ adds x8, x8, #1 /* increment BE ctr */
+ rev x7, x8
+ ins v4.d[1], x7
+ bcs .Lctrcarry /* overflow? */
+
+.Lctrcarrydone:
subs w4, w4, #1
- bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
+ bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
- beq .Lctrout
-.Lctrinc:
- adds x5, x5, #1 /* increment BE ctr */
- rev x7, x5
- ins v4.d[1], x7
- bcc .Lctrloop /* no overflow? */
+ bne .Lctrloop
+
+.Lctrout:
+ st1 {v4.16b}, [x5] /* return next CTR value */
+ FRAME_POP
+ ret
+
+.Lctrtailblock:
+ st1 {v0.16b}, [x0]
+ FRAME_POP
+ ret
+
+.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
ins v4.d[0], x7
- b .Lctrloop
-.Lctrhalfblock:
- ld1 {v3.8b}, [x1]
- eor v3.8b, v0.8b, v3.8b
- st1 {v3.8b}, [x0]
-.Lctrout:
- FRAME_POP
- ret
+ b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
.ltorg
@@ -531,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt)
FRAME_POP
ret
AES_ENDPROC(aes_xts_decrypt)
+
+ /*
+ * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ * int blocks, u8 dg[], int enc_before, int enc_after)
+ */
+AES_ENTRY(aes_mac_update)
+ ld1 {v0.16b}, [x4] /* get dg */
+ enc_prepare w2, x1, x7
+ cbnz w5, .Lmacenc
+
+.Lmacloop:
+ cbz w3, .Lmacout
+ ld1 {v1.16b}, [x0], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
+
+ subs w3, w3, #1
+ csinv x5, x6, xzr, eq
+ cbz w5, .Lmacout
+
+.Lmacenc:
+ encrypt_block v0, w2, x1, x7, w8
+ b .Lmacloop
+
+.Lmacout:
+ st1 {v0.16b}, [x4] /* return dg */
+ ret
+AES_ENDPROC(aes_mac_update)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 85f07ead7c5c..f1e3aa2732f9 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -17,17 +17,25 @@
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
- add \out, \in, \in
+ shl \out, \in, #1
and \temp, \temp, \const
eor \out, \out, \temp
.endm
+ /* multiply by polynomial 'x^2' in GF(2^8) */
+ .macro mul_by_x2, out, in, temp, const
+ ushr \temp, \in, #6
+ shl \out, \in, #2
+ pmul \temp, \temp, \const
+ eor \out, \out, \temp
+ .endm
+
/* preload the entire Sbox */
.macro prepare, sbox, shiftrows, temp
adr \temp, \sbox
- movi v12.16b, #0x40
+ movi v12.16b, #0x1b
ldr q13, \shiftrows
- movi v14.16b, #0x1b
+ ldr q14, .Lror32by8
ld1 {v16.16b-v19.16b}, [\temp], #64
ld1 {v20.16b-v23.16b}, [\temp], #64
ld1 {v24.16b-v27.16b}, [\temp], #64
@@ -50,37 +58,31 @@
/* apply SubBytes transformation using the the preloaded Sbox */
.macro sub_bytes, in
- sub v9.16b, \in\().16b, v12.16b
+ sub v9.16b, \in\().16b, v15.16b
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
- sub v10.16b, v9.16b, v12.16b
+ sub v10.16b, v9.16b, v15.16b
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
- sub v11.16b, v10.16b, v12.16b
+ sub v11.16b, v10.16b, v15.16b
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
.endm
/* apply MixColumns transformation */
- .macro mix_columns, in
- mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
- rev32 v8.8h, \in\().8h
- eor \in\().16b, v10.16b, \in\().16b
- shl v9.4s, v8.4s, #24
- shl v11.4s, \in\().4s, #24
- sri v9.4s, v8.4s, #8
- sri v11.4s, \in\().4s, #8
- eor v9.16b, v9.16b, v8.16b
- eor v10.16b, v10.16b, v9.16b
- eor \in\().16b, v10.16b, v11.16b
- .endm
-
+ .macro mix_columns, in, enc
+ .if \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
- .macro inv_mix_columns, in
- mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
- mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
- eor \in\().16b, \in\().16b, v11.16b
- rev32 v11.8h, v11.8h
- eor \in\().16b, \in\().16b, v11.16b
- mix_columns \in
+ mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
+ eor \in\().16b, \in\().16b, v8.16b
+ rev32 v8.8h, v8.8h
+ eor \in\().16b, \in\().16b, v8.16b
+ .endif
+
+ mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
+ rev32 v8.8h, \in\().8h
+ eor v8.16b, v8.16b, v9.16b
+ eor \in\().16b, \in\().16b, v8.16b
+ tbl \in\().16b, {\in\().16b}, v14.16b
+ eor \in\().16b, \in\().16b, v8.16b
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
@@ -88,16 +90,13 @@
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ movi v15.16b, #0x40
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
- ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
+ ld1 {v15.4s}, [\rkp], #16
beq 2222f
- .if \enc == 1
- mix_columns \in
- .else
- inv_mix_columns \in
- .endif
+ mix_columns \in, \enc
b 1111b
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
.endm
@@ -116,139 +115,114 @@
*/
.macro sub_bytes_2x, in0, in1
- sub v8.16b, \in0\().16b, v12.16b
- sub v9.16b, \in1\().16b, v12.16b
+ sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, v8.16b, v12.16b
- sub v11.16b, v9.16b, v12.16b
+ sub v10.16b, v8.16b, v15.16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ sub v11.16b, v9.16b, v15.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v10.16b, v12.16b
- sub v9.16b, v11.16b, v12.16b
+ sub v8.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
+ sub v9.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
.endm
.macro sub_bytes_4x, in0, in1, in2, in3
- sub v8.16b, \in0\().16b, v12.16b
+ sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
- sub v9.16b, \in1\().16b, v12.16b
+ sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, \in2\().16b, v12.16b
+ sub v10.16b, \in2\().16b, v15.16b
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
- sub v11.16b, \in3\().16b, v12.16b
+ sub v11.16b, \in3\().16b, v15.16b
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v8.16b, v12.16b
+ sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
- sub v9.16b, v9.16b, v12.16b
+ sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
- sub v10.16b, v10.16b, v12.16b
+ sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
- sub v11.16b, v11.16b, v12.16b
+ sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
- sub v8.16b, v8.16b, v12.16b
+ sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
- sub v9.16b, v9.16b, v12.16b
+ sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
- sub v10.16b, v10.16b, v12.16b
+ sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
- sub v11.16b, v11.16b, v12.16b
+ sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
.endm
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
- sshr \tmp0\().16b, \in0\().16b, #7
- add \out0\().16b, \in0\().16b, \in0\().16b
- sshr \tmp1\().16b, \in1\().16b, #7
+ sshr \tmp0\().16b, \in0\().16b, #7
+ shl \out0\().16b, \in0\().16b, #1
+ sshr \tmp1\().16b, \in1\().16b, #7
and \tmp0\().16b, \tmp0\().16b, \const\().16b
- add \out1\().16b, \in1\().16b, \in1\().16b
+ shl \out1\().16b, \in1\().16b, #1
and \tmp1\().16b, \tmp1\().16b, \const\().16b
eor \out0\().16b, \out0\().16b, \tmp0\().16b
eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
- .macro mix_columns_2x, in0, in1
- mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
- rev32 v10.8h, \in0\().8h
- rev32 v11.8h, \in1\().8h
- eor \in0\().16b, v8.16b, \in0\().16b
- eor \in1\().16b, v9.16b, \in1\().16b
- shl v12.4s, v10.4s, #24
- shl v13.4s, v11.4s, #24
- eor v8.16b, v8.16b, v10.16b
- sri v12.4s, v10.4s, #8
- shl v10.4s, \in0\().4s, #24
- eor v9.16b, v9.16b, v11.16b
- sri v13.4s, v11.4s, #8
- shl v11.4s, \in1\().4s, #24
- sri v10.4s, \in0\().4s, #8
- eor \in0\().16b, v8.16b, v12.16b
- sri v11.4s, \in1\().4s, #8
- eor \in1\().16b, v9.16b, v13.16b
- eor \in0\().16b, v10.16b, \in0\().16b
- eor \in1\().16b, v11.16b, \in1\().16b
+ .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
+ ushr \tmp0\().16b, \in0\().16b, #6
+ shl \out0\().16b, \in0\().16b, #2
+ ushr \tmp1\().16b, \in1\().16b, #6
+ pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
+ shl \out1\().16b, \in1\().16b, #2
+ pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
+ eor \out0\().16b, \out0\().16b, \tmp0\().16b
+ eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
- .macro inv_mix_cols_2x, in0, in1
- mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
- mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
+ .macro mix_columns_2x, in0, in1, enc
+ .if \enc == 0
+ /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+ mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
eor \in0\().16b, \in0\().16b, v8.16b
- eor \in1\().16b, \in1\().16b, v9.16b
rev32 v8.8h, v8.8h
- rev32 v9.8h, v9.8h
- eor \in0\().16b, \in0\().16b, v8.16b
- eor \in1\().16b, \in1\().16b, v9.16b
- mix_columns_2x \in0, \in1
- .endm
-
- .macro inv_mix_cols_4x, in0, in1, in2, in3
- mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
- mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
- mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
- mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
- eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
- eor \in2\().16b, \in2\().16b, v10.16b
- eor \in3\().16b, \in3\().16b, v11.16b
- rev32 v8.8h, v8.8h
rev32 v9.8h, v9.8h
- rev32 v10.8h, v10.8h
- rev32 v11.8h, v11.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
- eor \in2\().16b, \in2\().16b, v10.16b
- eor \in3\().16b, \in3\().16b, v11.16b
- mix_columns_2x \in0, \in1
- mix_columns_2x \in2, \in3
+ .endif
+
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
+ rev32 v10.8h, \in0\().8h
+ rev32 v11.8h, \in1\().8h
+ eor v10.16b, v10.16b, v8.16b
+ eor v11.16b, v11.16b, v9.16b
+ eor \in0\().16b, \in0\().16b, v10.16b
+ eor \in1\().16b, \in1\().16b, v11.16b
+ tbl \in0\().16b, {\in0\().16b}, v14.16b
+ tbl \in1\().16b, {\in1\().16b}, v14.16b
+ eor \in0\().16b, \in0\().16b, v10.16b
+ eor \in1\().16b, \in1\().16b, v11.16b
.endm
- .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+ .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
- sub_bytes_2x \in0, \in1
+ movi v15.16b, #0x40
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.4s}, [\rkp], #16
+ sub_bytes_2x \in0, \in1
subs \i, \i, #1
+ ld1 {v15.4s}, [\rkp], #16
beq 2222f
- .if \enc == 1
- mix_columns_2x \in0, \in1
- ldr q13, .LForward_ShiftRows
- .else
- inv_mix_cols_2x \in0, \in1
- ldr q13, .LReverse_ShiftRows
- .endif
- movi v12.16b, #0x40
+ mix_columns_2x \in0, \in1, \enc
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
@@ -262,23 +236,17 @@
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
- sub_bytes_4x \in0, \in1, \in2, \in3
+ movi v15.16b, #0x40
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.4s}, [\rkp], #16
+ sub_bytes_4x \in0, \in1, \in2, \in3
subs \i, \i, #1
+ ld1 {v15.4s}, [\rkp], #16
beq 2222f
- .if \enc == 1
- mix_columns_2x \in0, \in1
- mix_columns_2x \in2, \in3
- ldr q13, .LForward_ShiftRows
- .else
- inv_mix_cols_4x \in0, \in1, \in2, \in3
- ldr q13, .LReverse_ShiftRows
- .endif
- movi v12.16b, #0x40
+ mix_columns_2x \in0, \in1, \enc
+ mix_columns_2x \in2, \in3, \enc
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
@@ -305,19 +273,7 @@
#include "aes-modes.S"
.text
- .align 4
-.LForward_ShiftRows:
-CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
-CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
-CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
-CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
-
-.LReverse_ShiftRows:
-CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
-CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
-CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
-CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
-
+ .align 6
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
@@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+
+.LForward_ShiftRows:
+ .octa 0x0b06010c07020d08030e09040f0a0500
+
+.LReverse_ShiftRows:
+ .octa 0x0306090c0f0205080b0e0104070a0d00
+
+.Lror32by8:
+ .octa 0x0c0f0e0d080b0a090407060500030201
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
new file mode 100644
index 000000000000..ca0472500433
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -0,0 +1,972 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The algorithm implemented here is described in detail by the paper
+ * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
+ * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
+ *
+ * This implementation is based primarily on the OpenSSL implementation
+ * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+ rounds .req x11
+ bskey .req x12
+
+ .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+ eor \b2, \b2, \b1
+ eor \b5, \b5, \b6
+ eor \b3, \b3, \b0
+ eor \b6, \b6, \b2
+ eor \b5, \b5, \b0
+ eor \b6, \b6, \b3
+ eor \b3, \b3, \b7
+ eor \b7, \b7, \b5
+ eor \b3, \b3, \b4
+ eor \b4, \b4, \b5
+ eor \b2, \b2, \b7
+ eor \b3, \b3, \b1
+ eor \b1, \b1, \b5
+ .endm
+
+ .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+ eor \b0, \b0, \b6
+ eor \b1, \b1, \b4
+ eor \b4, \b4, \b6
+ eor \b2, \b2, \b0
+ eor \b6, \b6, \b1
+ eor \b1, \b1, \b5
+ eor \b5, \b5, \b3
+ eor \b3, \b3, \b7
+ eor \b7, \b7, \b5
+ eor \b2, \b2, \b5
+ eor \b4, \b4, \b7
+ .endm
+
+ .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
+ eor \b1, \b1, \b7
+ eor \b4, \b4, \b7
+ eor \b7, \b7, \b5
+ eor \b1, \b1, \b3
+ eor \b2, \b2, \b5
+ eor \b3, \b3, \b7
+ eor \b6, \b6, \b1
+ eor \b2, \b2, \b0
+ eor \b5, \b5, \b3
+ eor \b4, \b4, \b6
+ eor \b0, \b0, \b6
+ eor \b1, \b1, \b4
+ .endm
+
+ .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
+ eor \b1, \b1, \b5
+ eor \b2, \b2, \b7
+ eor \b3, \b3, \b1
+ eor \b4, \b4, \b5
+ eor \b7, \b7, \b5
+ eor \b3, \b3, \b4
+ eor \b5, \b5, \b0
+ eor \b3, \b3, \b7
+ eor \b6, \b6, \b2
+ eor \b2, \b2, \b1
+ eor \b6, \b6, \b3
+ eor \b3, \b3, \b0
+ eor \b5, \b5, \b6
+ .endm
+
+ .macro mul_gf4, x0, x1, y0, y1, t0, t1
+ eor \t0, \y0, \y1
+ and \t0, \t0, \x0
+ eor \x0, \x0, \x1
+ and \t1, \x1, \y0
+ and \x0, \x0, \y1
+ eor \x1, \t1, \t0
+ eor \x0, \x0, \t1
+ .endm
+
+ .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
+ eor \t0, \y0, \y1
+ eor \t1, \y2, \y3
+ and \t0, \t0, \x0
+ and \t1, \t1, \x2
+ eor \x0, \x0, \x1
+ eor \x2, \x2, \x3
+ and \x1, \x1, \y0
+ and \x3, \x3, \y2
+ and \x0, \x0, \y1
+ and \x2, \x2, \y3
+ eor \x1, \x1, \x0
+ eor \x2, \x2, \x3
+ eor \x0, \x0, \t0
+ eor \x3, \x3, \t1
+ .endm
+
+ .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, t0, t1, t2, t3
+ eor \t0, \x0, \x2
+ eor \t1, \x1, \x3
+ mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
+ eor \y0, \y0, \y2
+ eor \y1, \y1, \y3
+ mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
+ eor \x0, \x0, \t0
+ eor \x2, \x2, \t0
+ eor \x1, \x1, \t1
+ eor \x3, \x3, \t1
+ eor \t0, \x4, \x6
+ eor \t1, \x5, \x7
+ mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
+ eor \y0, \y0, \y2
+ eor \y1, \y1, \y3
+ mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
+ eor \x4, \x4, \t0
+ eor \x6, \x6, \t0
+ eor \x5, \x5, \t1
+ eor \x7, \x7, \t1
+ .endm
+
+ .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ eor \t3, \x4, \x6
+ eor \t0, \x5, \x7
+ eor \t1, \x1, \x3
+ eor \s1, \x7, \x6
+ eor \s0, \x0, \x2
+ eor \s3, \t3, \t0
+ orr \t2, \t0, \t1
+ and \s2, \t3, \s0
+ orr \t3, \t3, \s0
+ eor \s0, \s0, \t1
+ and \t0, \t0, \t1
+ eor \t1, \x3, \x2
+ and \s3, \s3, \s0
+ and \s1, \s1, \t1
+ eor \t1, \x4, \x5
+ eor \s0, \x1, \x0
+ eor \t3, \t3, \s1
+ eor \t2, \t2, \s1
+ and \s1, \t1, \s0
+ orr \t1, \t1, \s0
+ eor \t3, \t3, \s3
+ eor \t0, \t0, \s1
+ eor \t2, \t2, \s2
+ eor \t1, \t1, \s3
+ eor \t0, \t0, \s2
+ and \s0, \x7, \x3
+ eor \t1, \t1, \s2
+ and \s1, \x6, \x2
+ and \s2, \x5, \x1
+ orr \s3, \x4, \x0
+ eor \t3, \t3, \s0
+ eor \t1, \t1, \s2
+ eor \s0, \t0, \s3
+ eor \t2, \t2, \s1
+ and \s2, \t3, \t1
+ eor \s1, \t2, \s2
+ eor \s3, \s0, \s2
+ bsl \s1, \t1, \s0
+ not \t0, \s0
+ bsl \s0, \s1, \s3
+ bsl \t0, \s1, \s3
+ bsl \s3, \t3, \t2
+ eor \t3, \t3, \t2
+ and \s2, \s0, \s3
+ eor \t1, \t1, \t0
+ eor \s2, \s2, \t3
+ mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+ \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+ .endm
+
+ .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+ \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+ inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
+ \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+ \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+ \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+ out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+ \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
+ .endm
+
+ .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+ \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+ inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
+ \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+ \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+ \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+ inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+ \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
+ .endm
+
+ .macro enc_next_rk
+ ldp q16, q17, [bskey], #128
+ ldp q18, q19, [bskey, #-96]
+ ldp q20, q21, [bskey, #-64]
+ ldp q22, q23, [bskey, #-32]
+ .endm
+
+ .macro dec_next_rk
+ ldp q16, q17, [bskey, #-128]!
+ ldp q18, q19, [bskey, #32]
+ ldp q20, q21, [bskey, #64]
+ ldp q22, q23, [bskey, #96]
+ .endm
+
+ .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
+ eor \x0\().16b, \x0\().16b, v16.16b
+ eor \x1\().16b, \x1\().16b, v17.16b
+ eor \x2\().16b, \x2\().16b, v18.16b
+ eor \x3\().16b, \x3\().16b, v19.16b
+ eor \x4\().16b, \x4\().16b, v20.16b
+ eor \x5\().16b, \x5\().16b, v21.16b
+ eor \x6\().16b, \x6\().16b, v22.16b
+ eor \x7\().16b, \x7\().16b, v23.16b
+ .endm
+
+ .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
+ tbl \x0\().16b, {\x0\().16b}, \mask\().16b
+ tbl \x1\().16b, {\x1\().16b}, \mask\().16b
+ tbl \x2\().16b, {\x2\().16b}, \mask\().16b
+ tbl \x3\().16b, {\x3\().16b}, \mask\().16b
+ tbl \x4\().16b, {\x4\().16b}, \mask\().16b
+ tbl \x5\().16b, {\x5\().16b}, \mask\().16b
+ tbl \x6\().16b, {\x6\().16b}, \mask\().16b
+ tbl \x7\().16b, {\x7\().16b}, \mask\().16b
+ .endm
+
+ .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, t4, t5, t6, t7, inv
+ ext \t0\().16b, \x0\().16b, \x0\().16b, #12
+ ext \t1\().16b, \x1\().16b, \x1\().16b, #12
+ eor \x0\().16b, \x0\().16b, \t0\().16b
+ ext \t2\().16b, \x2\().16b, \x2\().16b, #12
+ eor \x1\().16b, \x1\().16b, \t1\().16b
+ ext \t3\().16b, \x3\().16b, \x3\().16b, #12
+ eor \x2\().16b, \x2\().16b, \t2\().16b
+ ext \t4\().16b, \x4\().16b, \x4\().16b, #12
+ eor \x3\().16b, \x3\().16b, \t3\().16b
+ ext \t5\().16b, \x5\().16b, \x5\().16b, #12
+ eor \x4\().16b, \x4\().16b, \t4\().16b
+ ext \t6\().16b, \x6\().16b, \x6\().16b, #12
+ eor \x5\().16b, \x5\().16b, \t5\().16b
+ ext \t7\().16b, \x7\().16b, \x7\().16b, #12
+ eor \x6\().16b, \x6\().16b, \t6\().16b
+ eor \t1\().16b, \t1\().16b, \x0\().16b
+ eor \x7\().16b, \x7\().16b, \t7\().16b
+ ext \x0\().16b, \x0\().16b, \x0\().16b, #8
+ eor \t2\().16b, \t2\().16b, \x1\().16b
+ eor \t0\().16b, \t0\().16b, \x7\().16b
+ eor \t1\().16b, \t1\().16b, \x7\().16b
+ ext \x1\().16b, \x1\().16b, \x1\().16b, #8
+ eor \t5\().16b, \t5\().16b, \x4\().16b
+ eor \x0\().16b, \x0\().16b, \t0\().16b
+ eor \t6\().16b, \t6\().16b, \x5\().16b
+ eor \x1\().16b, \x1\().16b, \t1\().16b
+ ext \t0\().16b, \x4\().16b, \x4\().16b, #8
+ eor \t4\().16b, \t4\().16b, \x3\().16b
+ ext \t1\().16b, \x5\().16b, \x5\().16b, #8
+ eor \t7\().16b, \t7\().16b, \x6\().16b
+ ext \x4\().16b, \x3\().16b, \x3\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x2\().16b
+ ext \x5\().16b, \x7\().16b, \x7\().16b, #8
+ eor \t4\().16b, \t4\().16b, \x7\().16b
+ ext \x3\().16b, \x6\().16b, \x6\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x7\().16b
+ ext \x6\().16b, \x2\().16b, \x2\().16b, #8
+ eor \x7\().16b, \t1\().16b, \t5\().16b
+ .ifb \inv
+ eor \x2\().16b, \t0\().16b, \t4\().16b
+ eor \x4\().16b, \x4\().16b, \t3\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x3\().16b, \x3\().16b, \t6\().16b
+ eor \x6\().16b, \x6\().16b, \t2\().16b
+ .else
+ eor \t3\().16b, \t3\().16b, \x4\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x2\().16b, \x3\().16b, \t6\().16b
+ eor \x3\().16b, \t0\().16b, \t4\().16b
+ eor \x4\().16b, \x6\().16b, \t2\().16b
+ mov \x6\().16b, \t3\().16b
+ .endif
+ .endm
+
+ .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, t4, t5, t6, t7
+ ext \t0\().16b, \x0\().16b, \x0\().16b, #8
+ ext \t6\().16b, \x6\().16b, \x6\().16b, #8
+ ext \t7\().16b, \x7\().16b, \x7\().16b, #8
+ eor \t0\().16b, \t0\().16b, \x0\().16b
+ ext \t1\().16b, \x1\().16b, \x1\().16b, #8
+ eor \t6\().16b, \t6\().16b, \x6\().16b
+ ext \t2\().16b, \x2\().16b, \x2\().16b, #8
+ eor \t7\().16b, \t7\().16b, \x7\().16b
+ ext \t3\().16b, \x3\().16b, \x3\().16b, #8
+ eor \t1\().16b, \t1\().16b, \x1\().16b
+ ext \t4\().16b, \x4\().16b, \x4\().16b, #8
+ eor \t2\().16b, \t2\().16b, \x2\().16b
+ ext \t5\().16b, \x5\().16b, \x5\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x3\().16b
+ eor \t4\().16b, \t4\().16b, \x4\().16b
+ eor \t5\().16b, \t5\().16b, \x5\().16b
+ eor \x0\().16b, \x0\().16b, \t6\().16b
+ eor \x1\().16b, \x1\().16b, \t6\().16b
+ eor \x2\().16b, \x2\().16b, \t0\().16b
+ eor \x4\().16b, \x4\().16b, \t2\().16b
+ eor \x3\().16b, \x3\().16b, \t1\().16b
+ eor \x1\().16b, \x1\().16b, \t7\().16b
+ eor \x2\().16b, \x2\().16b, \t7\().16b
+ eor \x4\().16b, \x4\().16b, \t6\().16b
+ eor \x5\().16b, \x5\().16b, \t3\().16b
+ eor \x3\().16b, \x3\().16b, \t6\().16b
+ eor \x6\().16b, \x6\().16b, \t4\().16b
+ eor \x4\().16b, \x4\().16b, \t7\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x7\().16b, \x7\().16b, \t5\().16b
+ mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+ \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
+ .endm
+
+ .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
+ ushr \t0\().2d, \b0\().2d, #\n
+ ushr \t1\().2d, \b1\().2d, #\n
+ eor \t0\().16b, \t0\().16b, \a0\().16b
+ eor \t1\().16b, \t1\().16b, \a1\().16b
+ and \t0\().16b, \t0\().16b, \mask\().16b
+ and \t1\().16b, \t1\().16b, \mask\().16b
+ eor \a0\().16b, \a0\().16b, \t0\().16b
+ shl \t0\().2d, \t0\().2d, #\n
+ eor \a1\().16b, \a1\().16b, \t1\().16b
+ shl \t1\().2d, \t1\().2d, #\n
+ eor \b0\().16b, \b0\().16b, \t0\().16b
+ eor \b1\().16b, \b1\().16b, \t1\().16b
+ .endm
+
+ .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
+ movi \t0\().16b, #0x55
+ movi \t1\().16b, #0x33
+ swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
+ swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
+ movi \t0\().16b, #0x0f
+ swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
+ swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
+ swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
+ swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
+ .endm
+
+
+ .align 6
+M0: .octa 0x0004080c0105090d02060a0e03070b0f
+
+M0SR: .octa 0x0004080c05090d010a0e02060f03070b
+SR: .octa 0x0f0e0d0c0a09080b0504070600030201
+SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
+
+M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
+ISR: .octa 0x0f0e0d0c080b0a090504070602010003
+ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
+
+ /*
+ * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
+ */
+ENTRY(aesbs_convert_key)
+ ld1 {v7.4s}, [x1], #16 // load round 0 key
+ ld1 {v17.4s}, [x1], #16 // load round 1 key
+
+ movi v8.16b, #0x01 // bit masks
+ movi v9.16b, #0x02
+ movi v10.16b, #0x04
+ movi v11.16b, #0x08
+ movi v12.16b, #0x10
+ movi v13.16b, #0x20
+ movi v14.16b, #0x40
+ movi v15.16b, #0x80
+ ldr q16, M0
+
+ sub x2, x2, #1
+ str q7, [x0], #16 // save round 0 key
+
+.Lkey_loop:
+ tbl v7.16b ,{v17.16b}, v16.16b
+ ld1 {v17.4s}, [x1], #16 // load next round key
+
+ cmtst v0.16b, v7.16b, v8.16b
+ cmtst v1.16b, v7.16b, v9.16b
+ cmtst v2.16b, v7.16b, v10.16b
+ cmtst v3.16b, v7.16b, v11.16b
+ cmtst v4.16b, v7.16b, v12.16b
+ cmtst v5.16b, v7.16b, v13.16b
+ cmtst v6.16b, v7.16b, v14.16b
+ cmtst v7.16b, v7.16b, v15.16b
+ not v0.16b, v0.16b
+ not v1.16b, v1.16b
+ not v5.16b, v5.16b
+ not v6.16b, v6.16b
+
+ subs x2, x2, #1
+ stp q0, q1, [x0], #128
+ stp q2, q3, [x0, #-96]
+ stp q4, q5, [x0, #-64]
+ stp q6, q7, [x0, #-32]
+ b.ne .Lkey_loop
+
+ movi v7.16b, #0x63 // compose .L63
+ eor v17.16b, v17.16b, v7.16b
+ str q17, [x0]
+ ret
+ENDPROC(aesbs_convert_key)
+
+ .align 4
+aesbs_encrypt8:
+ ldr q9, [bskey], #16 // round 0 key
+ ldr q8, M0SR
+ ldr q24, SR
+
+ eor v10.16b, v0.16b, v9.16b // xor with round0 key
+ eor v11.16b, v1.16b, v9.16b
+ tbl v0.16b, {v10.16b}, v8.16b
+ eor v12.16b, v2.16b, v9.16b
+ tbl v1.16b, {v11.16b}, v8.16b
+ eor v13.16b, v3.16b, v9.16b
+ tbl v2.16b, {v12.16b}, v8.16b
+ eor v14.16b, v4.16b, v9.16b
+ tbl v3.16b, {v13.16b}, v8.16b
+ eor v15.16b, v5.16b, v9.16b
+ tbl v4.16b, {v14.16b}, v8.16b
+ eor v10.16b, v6.16b, v9.16b
+ tbl v5.16b, {v15.16b}, v8.16b
+ eor v11.16b, v7.16b, v9.16b
+ tbl v6.16b, {v10.16b}, v8.16b
+ tbl v7.16b, {v11.16b}, v8.16b
+
+ bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+ sub rounds, rounds, #1
+ b .Lenc_sbox
+
+.Lenc_loop:
+ shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Lenc_sbox:
+ sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+ subs rounds, rounds, #1
+ b.cc .Lenc_done
+
+ enc_next_rk
+
+ mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+
+ add_round_key v0, v1, v2, v3, v4, v5, v6, v7
+
+ b.ne .Lenc_loop
+ ldr q24, SRM0
+ b .Lenc_loop
+
+.Lenc_done:
+ ldr q12, [bskey] // last round key
+
+ bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
+
+ eor v0.16b, v0.16b, v12.16b
+ eor v1.16b, v1.16b, v12.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v6.16b, v6.16b, v12.16b
+ eor v3.16b, v3.16b, v12.16b
+ eor v7.16b, v7.16b, v12.16b
+ eor v2.16b, v2.16b, v12.16b
+ eor v5.16b, v5.16b, v12.16b
+ ret
+ENDPROC(aesbs_encrypt8)
+
+ .align 4
+aesbs_decrypt8:
+ lsl x9, rounds, #7
+ add bskey, bskey, x9
+
+ ldr q9, [bskey, #-112]! // round 0 key
+ ldr q8, M0ISR
+ ldr q24, ISR
+
+ eor v10.16b, v0.16b, v9.16b // xor with round0 key
+ eor v11.16b, v1.16b, v9.16b
+ tbl v0.16b, {v10.16b}, v8.16b
+ eor v12.16b, v2.16b, v9.16b
+ tbl v1.16b, {v11.16b}, v8.16b
+ eor v13.16b, v3.16b, v9.16b
+ tbl v2.16b, {v12.16b}, v8.16b
+ eor v14.16b, v4.16b, v9.16b
+ tbl v3.16b, {v13.16b}, v8.16b
+ eor v15.16b, v5.16b, v9.16b
+ tbl v4.16b, {v14.16b}, v8.16b
+ eor v10.16b, v6.16b, v9.16b
+ tbl v5.16b, {v15.16b}, v8.16b
+ eor v11.16b, v7.16b, v9.16b
+ tbl v6.16b, {v10.16b}, v8.16b
+ tbl v7.16b, {v11.16b}, v8.16b
+
+ bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+ sub rounds, rounds, #1
+ b .Ldec_sbox
+
+.Ldec_loop:
+ shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Ldec_sbox:
+ inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+ subs rounds, rounds, #1
+ b.cc .Ldec_done
+
+ dec_next_rk
+
+ add_round_key v0, v1, v6, v4, v2, v7, v3, v5
+
+ inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+
+ b.ne .Ldec_loop
+ ldr q24, ISRM0
+ b .Ldec_loop
+.Ldec_done:
+ ldr q12, [bskey, #-16] // last round key
+
+ bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
+
+ eor v0.16b, v0.16b, v12.16b
+ eor v1.16b, v1.16b, v12.16b
+ eor v6.16b, v6.16b, v12.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v2.16b, v2.16b, v12.16b
+ eor v7.16b, v7.16b, v12.16b
+ eor v3.16b, v3.16b, v12.16b
+ eor v5.16b, v5.16b, v12.16b
+ ret
+ENDPROC(aesbs_decrypt8)
+
+ /*
+ * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ */
+ .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+99: mov x5, #1
+ lsl x5, x5, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x5, x5, xzr, mi
+
+ ld1 {v0.16b}, [x1], #16
+ tbnz x5, #1, 0f
+ ld1 {v1.16b}, [x1], #16
+ tbnz x5, #2, 0f
+ ld1 {v2.16b}, [x1], #16
+ tbnz x5, #3, 0f
+ ld1 {v3.16b}, [x1], #16
+ tbnz x5, #4, 0f
+ ld1 {v4.16b}, [x1], #16
+ tbnz x5, #5, 0f
+ ld1 {v5.16b}, [x1], #16
+ tbnz x5, #6, 0f
+ ld1 {v6.16b}, [x1], #16
+ tbnz x5, #7, 0f
+ ld1 {v7.16b}, [x1], #16
+
+0: mov bskey, x2
+ mov rounds, x3
+ bl \do8
+
+ st1 {\o0\().16b}, [x0], #16
+ tbnz x5, #1, 1f
+ st1 {\o1\().16b}, [x0], #16
+ tbnz x5, #2, 1f
+ st1 {\o2\().16b}, [x0], #16
+ tbnz x5, #3, 1f
+ st1 {\o3\().16b}, [x0], #16
+ tbnz x5, #4, 1f
+ st1 {\o4\().16b}, [x0], #16
+ tbnz x5, #5, 1f
+ st1 {\o5\().16b}, [x0], #16
+ tbnz x5, #6, 1f
+ st1 {\o6\().16b}, [x0], #16
+ tbnz x5, #7, 1f
+ st1 {\o7\().16b}, [x0], #16
+
+ cbnz x4, 99b
+
+1: ldp x29, x30, [sp], #16
+ ret
+ .endm
+
+ .align 4
+ENTRY(aesbs_ecb_encrypt)
+ __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_ecb_encrypt)
+
+ .align 4
+ENTRY(aesbs_ecb_decrypt)
+ __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_ecb_decrypt)
+
+ /*
+ * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ */
+ .align 4
+ENTRY(aesbs_cbc_decrypt)
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+99: mov x6, #1
+ lsl x6, x6, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x6, x6, xzr, mi
+
+ ld1 {v0.16b}, [x1], #16
+ mov v25.16b, v0.16b
+ tbnz x6, #1, 0f
+ ld1 {v1.16b}, [x1], #16
+ mov v26.16b, v1.16b
+ tbnz x6, #2, 0f
+ ld1 {v2.16b}, [x1], #16
+ mov v27.16b, v2.16b
+ tbnz x6, #3, 0f
+ ld1 {v3.16b}, [x1], #16
+ mov v28.16b, v3.16b
+ tbnz x6, #4, 0f
+ ld1 {v4.16b}, [x1], #16
+ mov v29.16b, v4.16b
+ tbnz x6, #5, 0f
+ ld1 {v5.16b}, [x1], #16
+ mov v30.16b, v5.16b
+ tbnz x6, #6, 0f
+ ld1 {v6.16b}, [x1], #16
+ mov v31.16b, v6.16b
+ tbnz x6, #7, 0f
+ ld1 {v7.16b}, [x1]
+
+0: mov bskey, x2
+ mov rounds, x3
+ bl aesbs_decrypt8
+
+ ld1 {v24.16b}, [x5] // load IV
+
+ eor v1.16b, v1.16b, v25.16b
+ eor v6.16b, v6.16b, v26.16b
+ eor v4.16b, v4.16b, v27.16b
+ eor v2.16b, v2.16b, v28.16b
+ eor v7.16b, v7.16b, v29.16b
+ eor v0.16b, v0.16b, v24.16b
+ eor v3.16b, v3.16b, v30.16b
+ eor v5.16b, v5.16b, v31.16b
+
+ st1 {v0.16b}, [x0], #16
+ mov v24.16b, v25.16b
+ tbnz x6, #1, 1f
+ st1 {v1.16b}, [x0], #16
+ mov v24.16b, v26.16b
+ tbnz x6, #2, 1f
+ st1 {v6.16b}, [x0], #16
+ mov v24.16b, v27.16b
+ tbnz x6, #3, 1f
+ st1 {v4.16b}, [x0], #16
+ mov v24.16b, v28.16b
+ tbnz x6, #4, 1f
+ st1 {v2.16b}, [x0], #16
+ mov v24.16b, v29.16b
+ tbnz x6, #5, 1f
+ st1 {v7.16b}, [x0], #16
+ mov v24.16b, v30.16b
+ tbnz x6, #6, 1f
+ st1 {v3.16b}, [x0], #16
+ mov v24.16b, v31.16b
+ tbnz x6, #7, 1f
+ ld1 {v24.16b}, [x1], #16
+ st1 {v5.16b}, [x0], #16
+1: st1 {v24.16b}, [x5] // store IV
+
+ cbnz x4, 99b
+
+ ldp x29, x30, [sp], #16
+ ret
+ENDPROC(aesbs_cbc_decrypt)
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+ .align 4
+.Lxts_mul_x:
+CPU_LE( .quad 1, 0x87 )
+CPU_BE( .quad 0x87, 1 )
+
+ /*
+ * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ */
+__xts_crypt8:
+ mov x6, #1
+ lsl x6, x6, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x6, x6, xzr, mi
+
+ ld1 {v0.16b}, [x1], #16
+ next_tweak v26, v25, v30, v31
+ eor v0.16b, v0.16b, v25.16b
+ tbnz x6, #1, 0f
+
+ ld1 {v1.16b}, [x1], #16
+ next_tweak v27, v26, v30, v31
+ eor v1.16b, v1.16b, v26.16b
+ tbnz x6, #2, 0f
+
+ ld1 {v2.16b}, [x1], #16
+ next_tweak v28, v27, v30, v31
+ eor v2.16b, v2.16b, v27.16b
+ tbnz x6, #3, 0f
+
+ ld1 {v3.16b}, [x1], #16
+ next_tweak v29, v28, v30, v31
+ eor v3.16b, v3.16b, v28.16b
+ tbnz x6, #4, 0f
+
+ ld1 {v4.16b}, [x1], #16
+ str q29, [sp, #16]
+ eor v4.16b, v4.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #5, 0f
+
+ ld1 {v5.16b}, [x1], #16
+ str q29, [sp, #32]
+ eor v5.16b, v5.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #6, 0f
+
+ ld1 {v6.16b}, [x1], #16
+ str q29, [sp, #48]
+ eor v6.16b, v6.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #7, 0f
+
+ ld1 {v7.16b}, [x1], #16
+ str q29, [sp, #64]
+ eor v7.16b, v7.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+
+0: mov bskey, x2
+ mov rounds, x3
+ br x7
+ENDPROC(__xts_crypt8)
+
+ .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+
+ ldr q30, .Lxts_mul_x
+ ld1 {v25.16b}, [x5]
+
+99: adr x7, \do8
+ bl __xts_crypt8
+
+ ldp q16, q17, [sp, #16]
+ ldp q18, q19, [sp, #48]
+
+ eor \o0\().16b, \o0\().16b, v25.16b
+ eor \o1\().16b, \o1\().16b, v26.16b
+ eor \o2\().16b, \o2\().16b, v27.16b
+ eor \o3\().16b, \o3\().16b, v28.16b
+
+ st1 {\o0\().16b}, [x0], #16
+ mov v25.16b, v26.16b
+ tbnz x6, #1, 1f
+ st1 {\o1\().16b}, [x0], #16
+ mov v25.16b, v27.16b
+ tbnz x6, #2, 1f
+ st1 {\o2\().16b}, [x0], #16
+ mov v25.16b, v28.16b
+ tbnz x6, #3, 1f
+ st1 {\o3\().16b}, [x0], #16
+ mov v25.16b, v29.16b
+ tbnz x6, #4, 1f
+
+ eor \o4\().16b, \o4\().16b, v16.16b
+ eor \o5\().16b, \o5\().16b, v17.16b
+ eor \o6\().16b, \o6\().16b, v18.16b
+ eor \o7\().16b, \o7\().16b, v19.16b
+
+ st1 {\o4\().16b}, [x0], #16
+ tbnz x6, #5, 1f
+ st1 {\o5\().16b}, [x0], #16
+ tbnz x6, #6, 1f
+ st1 {\o6\().16b}, [x0], #16
+ tbnz x6, #7, 1f
+ st1 {\o7\().16b}, [x0], #16
+
+ cbnz x4, 99b
+
+1: st1 {v25.16b}, [x5]
+ ldp x29, x30, [sp], #80
+ ret
+ .endm
+
+ENTRY(aesbs_xts_encrypt)
+ __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_xts_encrypt)
+
+ENTRY(aesbs_xts_decrypt)
+ __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_xts_decrypt)
+
+ .macro next_ctr, v
+ mov \v\().d[1], x8
+ adds x8, x8, #1
+ mov \v\().d[0], x7
+ adc x7, x7, xzr
+ rev64 \v\().16b, \v\().16b
+ .endm
+
+ /*
+ * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ * int rounds, int blocks, u8 iv[], u8 final[])
+ */
+ENTRY(aesbs_ctr_encrypt)
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ cmp x6, #0
+ cset x10, ne
+ add x4, x4, x10 // do one extra block if final
+
+ ldp x7, x8, [x5]
+ ld1 {v0.16b}, [x5]
+CPU_LE( rev x7, x7 )
+CPU_LE( rev x8, x8 )
+ adds x8, x8, #1
+ adc x7, x7, xzr
+
+99: mov x9, #1
+ lsl x9, x9, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x9, x9, xzr, le
+
+ tbnz x9, #1, 0f
+ next_ctr v1
+ tbnz x9, #2, 0f
+ next_ctr v2
+ tbnz x9, #3, 0f
+ next_ctr v3
+ tbnz x9, #4, 0f
+ next_ctr v4
+ tbnz x9, #5, 0f
+ next_ctr v5
+ tbnz x9, #6, 0f
+ next_ctr v6
+ tbnz x9, #7, 0f
+ next_ctr v7
+
+0: mov bskey, x2
+ mov rounds, x3
+ bl aesbs_encrypt8
+
+ lsr x9, x9, x10 // disregard the extra block
+ tbnz x9, #0, 0f
+
+ ld1 {v8.16b}, [x1], #16
+ eor v0.16b, v0.16b, v8.16b
+ st1 {v0.16b}, [x0], #16
+ tbnz x9, #1, 1f
+
+ ld1 {v9.16b}, [x1], #16
+ eor v1.16b, v1.16b, v9.16b
+ st1 {v1.16b}, [x0], #16
+ tbnz x9, #2, 2f
+
+ ld1 {v10.16b}, [x1], #16
+ eor v4.16b, v4.16b, v10.16b
+ st1 {v4.16b}, [x0], #16
+ tbnz x9, #3, 3f
+
+ ld1 {v11.16b}, [x1], #16
+ eor v6.16b, v6.16b, v11.16b
+ st1 {v6.16b}, [x0], #16
+ tbnz x9, #4, 4f
+
+ ld1 {v12.16b}, [x1], #16
+ eor v3.16b, v3.16b, v12.16b
+ st1 {v3.16b}, [x0], #16
+ tbnz x9, #5, 5f
+
+ ld1 {v13.16b}, [x1], #16
+ eor v7.16b, v7.16b, v13.16b
+ st1 {v7.16b}, [x0], #16
+ tbnz x9, #6, 6f
+
+ ld1 {v14.16b}, [x1], #16
+ eor v2.16b, v2.16b, v14.16b
+ st1 {v2.16b}, [x0], #16
+ tbnz x9, #7, 7f
+
+ ld1 {v15.16b}, [x1], #16
+ eor v5.16b, v5.16b, v15.16b
+ st1 {v5.16b}, [x0], #16
+
+8: next_ctr v0
+ cbnz x4, 99b
+
+0: st1 {v0.16b}, [x5]
+ ldp x29, x30, [sp], #16
+ ret
+
+ /*
+ * If we are handling the tail of the input (x6 != NULL), return the
+ * final keystream block back to the caller.
+ */
+1: cbz x6, 8b
+ st1 {v1.16b}, [x6]
+ b 8b
+2: cbz x6, 8b
+ st1 {v4.16b}, [x6]
+ b 8b
+3: cbz x6, 8b
+ st1 {v6.16b}, [x6]
+ b 8b
+4: cbz x6, 8b
+ st1 {v3.16b}, [x6]
+ b 8b
+5: cbz x6, 8b
+ st1 {v7.16b}, [x6]
+ b 8b
+6: cbz x6, 8b
+ st1 {v2.16b}, [x6]
+ b 8b
+7: cbz x6, 8b
+ st1 {v5.16b}, [x6]
+ b 8b
+ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
new file mode 100644
index 000000000000..db2501d93550
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -0,0 +1,439 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+
+asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
+
+asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+
+asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+
+asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], u8 final[]);
+
+asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+
+/* borrowed from aes-neon-blk.ko */
+asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int blocks, int first);
+asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int blocks, u8 iv[],
+ int first);
+
+struct aesbs_ctx {
+ u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
+ int rounds;
+} __aligned(AES_BLOCK_SIZE);
+
+struct aesbs_cbc_ctx {
+ struct aesbs_ctx key;
+ u32 enc[AES_MAX_KEYLENGTH_U32];
+};
+
+struct aesbs_xts_ctx {
+ struct aesbs_ctx key;
+ u32 twkey[AES_MAX_KEYLENGTH_U32];
+};
+
+static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_aes_ctx rk;
+ int err;
+
+ err = crypto_aes_expand_key(&rk, in_key, key_len);
+ if (err)
+ return err;
+
+ ctx->rounds = 6 + key_len / 4;
+
+ kernel_neon_begin();
+ aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int __ecb_crypt(struct skcipher_request *req,
+ void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks))
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ if (walk.nbytes < walk.total)
+ blocks = round_down(blocks,
+ walk.stride / AES_BLOCK_SIZE);
+
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
+ ctx->rounds, blocks);
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
+{
+ return __ecb_crypt(req, aesbs_ecb_encrypt);
+}
+
+static int ecb_decrypt(struct skcipher_request *req)
+{
+ return __ecb_crypt(req, aesbs_ecb_decrypt);
+}
+
+static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_aes_ctx rk;
+ int err;
+
+ err = crypto_aes_expand_key(&rk, in_key, key_len);
+ if (err)
+ return err;
+
+ ctx->key.rounds = 6 + key_len / 4;
+
+ memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
+
+ kernel_neon_begin();
+ aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err, first = 1;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ /* fall back to the non-bitsliced NEON implementation */
+ neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->enc, ctx->key.rounds, blocks, walk.iv,
+ first);
+ err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ first = 0;
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ if (walk.nbytes < walk.total)
+ blocks = round_down(blocks,
+ walk.stride / AES_BLOCK_SIZE);
+
+ aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int ctr_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes > 0) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
+
+ if (walk.nbytes < walk.total) {
+ blocks = round_down(blocks,
+ walk.stride / AES_BLOCK_SIZE);
+ final = NULL;
+ }
+
+ aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->rk, ctx->rounds, blocks, walk.iv, final);
+
+ if (final) {
+ u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+
+ if (dst != src)
+ memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
+ crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, 0);
+ break;
+ }
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_aes_ctx rk;
+ int err;
+
+ err = xts_verify_key(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ key_len /= 2;
+ err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
+ if (err)
+ return err;
+
+ memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
+
+ return aesbs_setkey(tfm, in_key, key_len);
+}
+
+static int __xts_crypt(struct skcipher_request *req,
+ void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]))
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+
+ neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
+ ctx->key.rounds, 1, 1);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ if (walk.nbytes < walk.total)
+ blocks = round_down(blocks,
+ walk.stride / AES_BLOCK_SIZE);
+
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
+ ctx->key.rounds, blocks, walk.iv);
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct skcipher_request *req)
+{
+ return __xts_crypt(req, aesbs_xts_encrypt);
+}
+
+static int xts_decrypt(struct skcipher_request *req)
+{
+ return __xts_crypt(req, aesbs_xts_decrypt);
+}
+
+static struct skcipher_alg aes_algs[] = { {
+ .base.cra_name = "__ecb(aes)",
+ .base.cra_driver_name = "__ecb-aes-neonbs",
+ .base.cra_priority = 250,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE,
+ .setkey = aesbs_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+}, {
+ .base.cra_name = "__cbc(aes)",
+ .base.cra_driver_name = "__cbc-aes-neonbs",
+ .base.cra_priority = 250,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_cbc_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+}, {
+ .base.cra_name = "__ctr(aes)",
+ .base.cra_driver_name = "__ctr-aes-neonbs",
+ .base.cra_priority = 250,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+}, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-neonbs",
+ .base.cra_priority = 250 - 1,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+}, {
+ .base.cra_name = "__xts(aes)",
+ .base.cra_driver_name = "__xts-aes-neonbs",
+ .base.cra_priority = 250,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .walksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_xts_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+} };
+
+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
+
+static void aes_exit(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
+ if (aes_simd_algs[i])
+ simd_skcipher_free(aes_simd_algs[i]);
+
+ crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static int __init aes_init(void)
+{
+ struct simd_skcipher_alg *simd;
+ const char *basename;
+ const char *algname;
+ const char *drvname;
+ int err;
+ int i;
+
+ if (!(elf_hwcap & HWCAP_ASIMD))
+ return -ENODEV;
+
+ err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+ if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
+ continue;
+
+ algname = aes_algs[i].base.cra_name + 2;
+ drvname = aes_algs[i].base.cra_driver_name + 2;
+ basename = aes_algs[i].base.cra_driver_name;
+ simd = simd_skcipher_create_compat(algname, drvname, basename);
+ err = PTR_ERR(simd);
+ if (IS_ERR(simd))
+ goto unregister_simds;
+
+ aes_simd_algs[i] = simd;
+ }
+ return 0;
+
+unregister_simds:
+ aes_exit();
+ return err;
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
new file mode 100644
index 000000000000..13c85e272c2a
--- /dev/null
+++ b/arch/arm64/crypto/chacha20-neon-core.S
@@ -0,0 +1,450 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .align 6
+
+ENTRY(chacha20_block_xor_neon)
+ // x0: Input state matrix, s
+ // x1: 1 data block output, o
+ // x2: 1 data block input, i
+
+ //
+ // This function encrypts one ChaCha20 block by loading the state matrix
+ // in four NEON registers. It performs matrix operation on four words in
+ // parallel, but requires shuffling to rearrange the words after each
+ // round.
+ //
+
+ // x0..3 = s0..3
+ adr x3, ROT8
+ ld1 {v0.4s-v3.4s}, [x0]
+ ld1 {v8.4s-v11.4s}, [x0]
+ ld1 {v12.4s}, [x3]
+
+ mov x3, #10
+
+.Ldoubleround:
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ tbl v3.16b, {v3.16b}, v12.16b
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ ext v1.16b, v1.16b, v1.16b, #4
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ ext v2.16b, v2.16b, v2.16b, #8
+ // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ ext v3.16b, v3.16b, v3.16b, #12
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ tbl v3.16b, {v3.16b}, v12.16b
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ ext v1.16b, v1.16b, v1.16b, #12
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ ext v2.16b, v2.16b, v2.16b, #8
+ // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ ext v3.16b, v3.16b, v3.16b, #4
+
+ subs x3, x3, #1
+ b.ne .Ldoubleround
+
+ ld1 {v4.16b-v7.16b}, [x2]
+
+ // o0 = i0 ^ (x0 + s0)
+ add v0.4s, v0.4s, v8.4s
+ eor v0.16b, v0.16b, v4.16b
+
+ // o1 = i1 ^ (x1 + s1)
+ add v1.4s, v1.4s, v9.4s
+ eor v1.16b, v1.16b, v5.16b
+
+ // o2 = i2 ^ (x2 + s2)
+ add v2.4s, v2.4s, v10.4s
+ eor v2.16b, v2.16b, v6.16b
+
+ // o3 = i3 ^ (x3 + s3)
+ add v3.4s, v3.4s, v11.4s
+ eor v3.16b, v3.16b, v7.16b
+
+ st1 {v0.16b-v3.16b}, [x1]
+
+ ret
+ENDPROC(chacha20_block_xor_neon)
+
+ .align 6
+ENTRY(chacha20_4block_xor_neon)
+ // x0: Input state matrix, s
+ // x1: 4 data blocks output, o
+ // x2: 4 data blocks input, i
+
+ //
+ // This function encrypts four consecutive ChaCha20 blocks by loading
+ // the state matrix in NEON registers four times. The algorithm performs
+ // each operation on the corresponding word of each state matrix, hence
+ // requires no word shuffling. For final XORing step we transpose the
+ // matrix by interleaving 32- and then 64-bit words, which allows us to
+ // do XOR in NEON registers.
+ //
+ adr x3, CTRINC // ... and ROT8
+ ld1 {v30.4s-v31.4s}, [x3]
+
+ // x0..15[0-3] = s0..3[0..3]
+ mov x4, x0
+ ld4r { v0.4s- v3.4s}, [x4], #16
+ ld4r { v4.4s- v7.4s}, [x4], #16
+ ld4r { v8.4s-v11.4s}, [x4], #16
+ ld4r {v12.4s-v15.4s}, [x4]
+
+ // x12 += counter values 0-3
+ add v12.4s, v12.4s, v30.4s
+
+ mov x3, #10
+
+.Ldoubleround4:
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+ add v0.4s, v0.4s, v4.4s
+ add v1.4s, v1.4s, v5.4s
+ add v2.4s, v2.4s, v6.4s
+ add v3.4s, v3.4s, v7.4s
+
+ eor v12.16b, v12.16b, v0.16b
+ eor v13.16b, v13.16b, v1.16b
+ eor v14.16b, v14.16b, v2.16b
+ eor v15.16b, v15.16b, v3.16b
+
+ rev32 v12.8h, v12.8h
+ rev32 v13.8h, v13.8h
+ rev32 v14.8h, v14.8h
+ rev32 v15.8h, v15.8h
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+ add v8.4s, v8.4s, v12.4s
+ add v9.4s, v9.4s, v13.4s
+ add v10.4s, v10.4s, v14.4s
+ add v11.4s, v11.4s, v15.4s
+
+ eor v16.16b, v4.16b, v8.16b
+ eor v17.16b, v5.16b, v9.16b
+ eor v18.16b, v6.16b, v10.16b
+ eor v19.16b, v7.16b, v11.16b
+
+ shl v4.4s, v16.4s, #12
+ shl v5.4s, v17.4s, #12
+ shl v6.4s, v18.4s, #12
+ shl v7.4s, v19.4s, #12
+
+ sri v4.4s, v16.4s, #20
+ sri v5.4s, v17.4s, #20
+ sri v6.4s, v18.4s, #20
+ sri v7.4s, v19.4s, #20
+
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ add v0.4s, v0.4s, v4.4s
+ add v1.4s, v1.4s, v5.4s
+ add v2.4s, v2.4s, v6.4s
+ add v3.4s, v3.4s, v7.4s
+
+ eor v12.16b, v12.16b, v0.16b
+ eor v13.16b, v13.16b, v1.16b
+ eor v14.16b, v14.16b, v2.16b
+ eor v15.16b, v15.16b, v3.16b
+
+ tbl v12.16b, {v12.16b}, v31.16b
+ tbl v13.16b, {v13.16b}, v31.16b
+ tbl v14.16b, {v14.16b}, v31.16b
+ tbl v15.16b, {v15.16b}, v31.16b
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+ add v8.4s, v8.4s, v12.4s
+ add v9.4s, v9.4s, v13.4s
+ add v10.4s, v10.4s, v14.4s
+ add v11.4s, v11.4s, v15.4s
+
+ eor v16.16b, v4.16b, v8.16b
+ eor v17.16b, v5.16b, v9.16b
+ eor v18.16b, v6.16b, v10.16b
+ eor v19.16b, v7.16b, v11.16b
+
+ shl v4.4s, v16.4s, #7
+ shl v5.4s, v17.4s, #7
+ shl v6.4s, v18.4s, #7
+ shl v7.4s, v19.4s, #7
+
+ sri v4.4s, v16.4s, #25
+ sri v5.4s, v17.4s, #25
+ sri v6.4s, v18.4s, #25
+ sri v7.4s, v19.4s, #25
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+ add v0.4s, v0.4s, v5.4s
+ add v1.4s, v1.4s, v6.4s
+ add v2.4s, v2.4s, v7.4s
+ add v3.4s, v3.4s, v4.4s
+
+ eor v15.16b, v15.16b, v0.16b
+ eor v12.16b, v12.16b, v1.16b
+ eor v13.16b, v13.16b, v2.16b
+ eor v14.16b, v14.16b, v3.16b
+
+ rev32 v15.8h, v15.8h
+ rev32 v12.8h, v12.8h
+ rev32 v13.8h, v13.8h
+ rev32 v14.8h, v14.8h
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+ add v10.4s, v10.4s, v15.4s
+ add v11.4s, v11.4s, v12.4s
+ add v8.4s, v8.4s, v13.4s
+ add v9.4s, v9.4s, v14.4s
+
+ eor v16.16b, v5.16b, v10.16b
+ eor v17.16b, v6.16b, v11.16b
+ eor v18.16b, v7.16b, v8.16b
+ eor v19.16b, v4.16b, v9.16b
+
+ shl v5.4s, v16.4s, #12
+ shl v6.4s, v17.4s, #12
+ shl v7.4s, v18.4s, #12
+ shl v4.4s, v19.4s, #12
+
+ sri v5.4s, v16.4s, #20
+ sri v6.4s, v17.4s, #20
+ sri v7.4s, v18.4s, #20
+ sri v4.4s, v19.4s, #20
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ add v0.4s, v0.4s, v5.4s
+ add v1.4s, v1.4s, v6.4s
+ add v2.4s, v2.4s, v7.4s
+ add v3.4s, v3.4s, v4.4s
+
+ eor v15.16b, v15.16b, v0.16b
+ eor v12.16b, v12.16b, v1.16b
+ eor v13.16b, v13.16b, v2.16b
+ eor v14.16b, v14.16b, v3.16b
+
+ tbl v15.16b, {v15.16b}, v31.16b
+ tbl v12.16b, {v12.16b}, v31.16b
+ tbl v13.16b, {v13.16b}, v31.16b
+ tbl v14.16b, {v14.16b}, v31.16b
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+ add v10.4s, v10.4s, v15.4s
+ add v11.4s, v11.4s, v12.4s
+ add v8.4s, v8.4s, v13.4s
+ add v9.4s, v9.4s, v14.4s
+
+ eor v16.16b, v5.16b, v10.16b
+ eor v17.16b, v6.16b, v11.16b
+ eor v18.16b, v7.16b, v8.16b
+ eor v19.16b, v4.16b, v9.16b
+
+ shl v5.4s, v16.4s, #7
+ shl v6.4s, v17.4s, #7
+ shl v7.4s, v18.4s, #7
+ shl v4.4s, v19.4s, #7
+
+ sri v5.4s, v16.4s, #25
+ sri v6.4s, v17.4s, #25
+ sri v7.4s, v18.4s, #25
+ sri v4.4s, v19.4s, #25
+
+ subs x3, x3, #1
+ b.ne .Ldoubleround4
+
+ ld4r {v16.4s-v19.4s}, [x0], #16
+ ld4r {v20.4s-v23.4s}, [x0], #16
+
+ // x12 += counter values 0-3
+ add v12.4s, v12.4s, v30.4s
+
+ // x0[0-3] += s0[0]
+ // x1[0-3] += s0[1]
+ // x2[0-3] += s0[2]
+ // x3[0-3] += s0[3]
+ add v0.4s, v0.4s, v16.4s
+ add v1.4s, v1.4s, v17.4s
+ add v2.4s, v2.4s, v18.4s
+ add v3.4s, v3.4s, v19.4s
+
+ ld4r {v24.4s-v27.4s}, [x0], #16
+ ld4r {v28.4s-v31.4s}, [x0]
+
+ // x4[0-3] += s1[0]
+ // x5[0-3] += s1[1]
+ // x6[0-3] += s1[2]
+ // x7[0-3] += s1[3]
+ add v4.4s, v4.4s, v20.4s
+ add v5.4s, v5.4s, v21.4s
+ add v6.4s, v6.4s, v22.4s
+ add v7.4s, v7.4s, v23.4s
+
+ // x8[0-3] += s2[0]
+ // x9[0-3] += s2[1]
+ // x10[0-3] += s2[2]
+ // x11[0-3] += s2[3]
+ add v8.4s, v8.4s, v24.4s
+ add v9.4s, v9.4s, v25.4s
+ add v10.4s, v10.4s, v26.4s
+ add v11.4s, v11.4s, v27.4s
+
+ // x12[0-3] += s3[0]
+ // x13[0-3] += s3[1]
+ // x14[0-3] += s3[2]
+ // x15[0-3] += s3[3]
+ add v12.4s, v12.4s, v28.4s
+ add v13.4s, v13.4s, v29.4s
+ add v14.4s, v14.4s, v30.4s
+ add v15.4s, v15.4s, v31.4s
+
+ // interleave 32-bit words in state n, n+1
+ zip1 v16.4s, v0.4s, v1.4s
+ zip2 v17.4s, v0.4s, v1.4s
+ zip1 v18.4s, v2.4s, v3.4s
+ zip2 v19.4s, v2.4s, v3.4s
+ zip1 v20.4s, v4.4s, v5.4s
+ zip2 v21.4s, v4.4s, v5.4s
+ zip1 v22.4s, v6.4s, v7.4s
+ zip2 v23.4s, v6.4s, v7.4s
+ zip1 v24.4s, v8.4s, v9.4s
+ zip2 v25.4s, v8.4s, v9.4s
+ zip1 v26.4s, v10.4s, v11.4s
+ zip2 v27.4s, v10.4s, v11.4s
+ zip1 v28.4s, v12.4s, v13.4s
+ zip2 v29.4s, v12.4s, v13.4s
+ zip1 v30.4s, v14.4s, v15.4s
+ zip2 v31.4s, v14.4s, v15.4s
+
+ // interleave 64-bit words in state n, n+2
+ zip1 v0.2d, v16.2d, v18.2d
+ zip2 v4.2d, v16.2d, v18.2d
+ zip1 v8.2d, v17.2d, v19.2d
+ zip2 v12.2d, v17.2d, v19.2d
+ ld1 {v16.16b-v19.16b}, [x2], #64
+
+ zip1 v1.2d, v20.2d, v22.2d
+ zip2 v5.2d, v20.2d, v22.2d
+ zip1 v9.2d, v21.2d, v23.2d
+ zip2 v13.2d, v21.2d, v23.2d
+ ld1 {v20.16b-v23.16b}, [x2], #64
+
+ zip1 v2.2d, v24.2d, v26.2d
+ zip2 v6.2d, v24.2d, v26.2d
+ zip1 v10.2d, v25.2d, v27.2d
+ zip2 v14.2d, v25.2d, v27.2d
+ ld1 {v24.16b-v27.16b}, [x2], #64
+
+ zip1 v3.2d, v28.2d, v30.2d
+ zip2 v7.2d, v28.2d, v30.2d
+ zip1 v11.2d, v29.2d, v31.2d
+ zip2 v15.2d, v29.2d, v31.2d
+ ld1 {v28.16b-v31.16b}, [x2]
+
+ // xor with corresponding input, write to output
+ eor v16.16b, v16.16b, v0.16b
+ eor v17.16b, v17.16b, v1.16b
+ eor v18.16b, v18.16b, v2.16b
+ eor v19.16b, v19.16b, v3.16b
+ eor v20.16b, v20.16b, v4.16b
+ eor v21.16b, v21.16b, v5.16b
+ st1 {v16.16b-v19.16b}, [x1], #64
+ eor v22.16b, v22.16b, v6.16b
+ eor v23.16b, v23.16b, v7.16b
+ eor v24.16b, v24.16b, v8.16b
+ eor v25.16b, v25.16b, v9.16b
+ st1 {v20.16b-v23.16b}, [x1], #64
+ eor v26.16b, v26.16b, v10.16b
+ eor v27.16b, v27.16b, v11.16b
+ eor v28.16b, v28.16b, v12.16b
+ st1 {v24.16b-v27.16b}, [x1], #64
+ eor v29.16b, v29.16b, v13.16b
+ eor v30.16b, v30.16b, v14.16b
+ eor v31.16b, v31.16b, v15.16b
+ st1 {v28.16b-v31.16b}, [x1]
+
+ ret
+ENDPROC(chacha20_4block_xor_neon)
+
+CTRINC: .word 0, 1, 2, 3
+ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
new file mode 100644
index 000000000000..a7cd575ea223
--- /dev/null
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -0,0 +1,126 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+
+static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes)
+{
+ u8 buf[CHACHA20_BLOCK_SIZE];
+
+ while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+ chacha20_4block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE * 4;
+ src += CHACHA20_BLOCK_SIZE * 4;
+ dst += CHACHA20_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA20_BLOCK_SIZE) {
+ chacha20_block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE;
+ src += CHACHA20_BLOCK_SIZE;
+ dst += CHACHA20_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha20_block_xor_neon(state, buf, buf);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+static int chacha20_neon(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+ return crypto_chacha20_crypt(req);
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ crypto_chacha20_init(state, ctx, walk.iv);
+
+ kernel_neon_begin();
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct skcipher_alg alg = {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha20_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA20_KEY_SIZE,
+ .max_keysize = CHACHA20_KEY_SIZE,
+ .ivsize = CHACHA20_IV_SIZE,
+ .chunksize = CHACHA20_BLOCK_SIZE,
+ .walksize = 4 * CHACHA20_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha20_neon,
+ .decrypt = chacha20_neon,
+};
+
+static int __init chacha20_simd_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_ASIMD))
+ return -ENODEV;
+
+ return crypto_register_skcipher(&alg);
+}
+
+static void __exit chacha20_simd_mod_fini(void)
+{
+ crypto_unregister_skcipher(&alg);
+}
+
+module_init(chacha20_simd_mod_init);
+module_exit(chacha20_simd_mod_fini);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
deleted file mode 100644
index 6a37c3c6b11d..000000000000
--- a/arch/arm64/crypto/crc32-arm64.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
- *
- * Module based on crypto/crc32c_generic.c
- *
- * CRC32 loop taken from Ed Nevill's Hadoop CRC patch
- * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
- *
- * Using inline assembly instead of intrinsics in order to be backwards
- * compatible with older compilers.
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/unaligned/access_ok.h>
-#include <linux/cpufeature.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-
-MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
-MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
-MODULE_LICENSE("GPL v2");
-
-#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-
-static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
-{
- s64 length = len;
-
- while ((length -= sizeof(u64)) >= 0) {
- CRC32X(crc, get_unaligned_le64(p));
- p += sizeof(u64);
- }
-
- /* The following is more efficient than the straight loop */
- if (length & sizeof(u32)) {
- CRC32W(crc, get_unaligned_le32(p));
- p += sizeof(u32);
- }
- if (length & sizeof(u16)) {
- CRC32H(crc, get_unaligned_le16(p));
- p += sizeof(u16);
- }
- if (length & sizeof(u8))
- CRC32B(crc, *p);
-
- return crc;
-}
-
-static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
-{
- s64 length = len;
-
- while ((length -= sizeof(u64)) >= 0) {
- CRC32CX(crc, get_unaligned_le64(p));
- p += sizeof(u64);
- }
-
- /* The following is more efficient than the straight loop */
- if (length & sizeof(u32)) {
- CRC32CW(crc, get_unaligned_le32(p));
- p += sizeof(u32);
- }
- if (length & sizeof(u16)) {
- CRC32CH(crc, get_unaligned_le16(p));
- p += sizeof(u16);
- }
- if (length & sizeof(u8))
- CRC32CB(crc, *p);
-
- return crc;
-}
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-struct chksum_ctx {
- u32 key;
-};
-
-struct chksum_desc_ctx {
- u32 crc;
-};
-
-static int chksum_init(struct shash_desc *desc)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
-
- return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (keylen != sizeof(mctx->key)) {
- crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- mctx->key = get_unaligned_le32(key);
- return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksumc_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(ctx->crc, out);
- return 0;
-}
-
-static int chksumc_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(~ctx->crc, out);
- return 0;
-}
-
-static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
- return 0;
-}
-
-static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
- return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(ctx->crc, data, len, out);
-}
-
-static int chksumc_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksumc_finup(ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
- unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksum_finup(mctx->key, data, length, out);
-}
-
-static int chksumc_digest(struct shash_desc *desc, const u8 *data,
- unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksumc_finup(mctx->key, data, length, out);
-}
-
-static int crc32_cra_init(struct crypto_tfm *tfm)
-{
- struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = 0;
- return 0;
-}
-
-static int crc32c_cra_init(struct crypto_tfm *tfm)
-{
- struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static struct shash_alg crc32_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksum_update,
- .final = chksum_final,
- .finup = chksum_finup,
- .digest = chksum_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-arm64-hw",
- .cra_priority = 300,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_alignmask = 0,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_cra_init,
- }
-};
-
-static struct shash_alg crc32c_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksumc_update,
- .final = chksumc_final,
- .finup = chksumc_finup,
- .digest = chksumc_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-arm64-hw",
- .cra_priority = 300,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_alignmask = 0,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32c_cra_init,
- }
-};
-
-static int __init crc32_mod_init(void)
-{
- int err;
-
- err = crypto_register_shash(&crc32_alg);
-
- if (err)
- return err;
-
- err = crypto_register_shash(&crc32c_alg);
-
- if (err) {
- crypto_unregister_shash(&crc32_alg);
- return err;
- }
-
- return 0;
-}
-
-static void __exit crc32_mod_exit(void)
-{
- crypto_unregister_shash(&crc32_alg);
- crypto_unregister_shash(&crc32c_alg);
-}
-
-module_cpu_feature_match(CRC32, crc32_mod_init);
-module_exit(crc32_mod_exit);
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index 8594127d5e01..eccb1ae90064 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
return 0;
}
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ u32 *crc = shash_desc_ctx(desc);
+
+ *crc = crc32_armv8_le(*crc, data, length);
+ return 0;
+}
+
+static int crc32c_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ u32 *crc = shash_desc_ctx(desc);
+
+ *crc = crc32c_armv8_le(*crc, data, length);
+ return 0;
+}
+
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
@@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
static struct shash_alg crc32_pmull_algs[] = { {
.setkey = crc32_pmull_setkey,
.init = crc32_pmull_init,
- .update = crc32_pmull_update,
+ .update = crc32_update,
.final = crc32_pmull_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
@@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
}, {
.setkey = crc32_pmull_setkey,
.init = crc32_pmull_init,
- .update = crc32c_pmull_update,
+ .update = crc32c_update,
.final = crc32c_pmull_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
@@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
static int __init crc32_pmull_mod_init(void)
{
- if (elf_hwcap & HWCAP_CRC32) {
- fallback_crc32 = crc32_armv8_le;
- fallback_crc32c = crc32c_armv8_le;
- } else {
- fallback_crc32 = crc32_le;
- fallback_crc32c = __crc32c_le;
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
+ crc32_pmull_algs[0].update = crc32_pmull_update;
+ crc32_pmull_algs[1].update = crc32c_pmull_update;
+
+ if (elf_hwcap & HWCAP_CRC32) {
+ fallback_crc32 = crc32_armv8_le;
+ fallback_crc32c = crc32c_armv8_le;
+ } else {
+ fallback_crc32 = crc32_le;
+ fallback_crc32c = __crc32c_le;
+ }
+ } else if (!(elf_hwcap & HWCAP_CRC32)) {
+ return -ENODEV;
}
-
return crypto_register_shashes(crc32_pmull_algs,
ARRAY_SIZE(crc32_pmull_algs));
}
@@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
ARRAY_SIZE(crc32_pmull_algs));
}
-module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
+static const struct cpu_feature crc32_cpu_feature[] = {
+ { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
+
+module_init(crc32_pmull_mod_init);
module_exit(crc32_pmull_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 8365a84c2640..a12f1afc95a3 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index eaa5bbe3fa87..b4b34004a21e 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -29,41 +29,29 @@
#include <clocksource/arm_arch_timer.h>
-#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585)
+#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
extern struct static_key_false arch_timer_read_ool_enabled;
-#define needs_fsl_a008585_workaround() \
+#define needs_unstable_timer_counter_workaround() \
static_branch_unlikely(&arch_timer_read_ool_enabled)
#else
-#define needs_fsl_a008585_workaround() false
+#define needs_unstable_timer_counter_workaround() false
#endif
-u32 __fsl_a008585_read_cntp_tval_el0(void);
-u32 __fsl_a008585_read_cntv_tval_el0(void);
-u64 __fsl_a008585_read_cntvct_el0(void);
-/*
- * The number of retries is an arbitrary value well beyond the highest number
- * of iterations the loop has been observed to take.
- */
-#define __fsl_a008585_read_reg(reg) ({ \
- u64 _old, _new; \
- int _retries = 200; \
- \
- do { \
- _old = read_sysreg(reg); \
- _new = read_sysreg(reg); \
- _retries--; \
- } while (unlikely(_old != _new) && _retries); \
- \
- WARN_ON_ONCE(!_retries); \
- _new; \
-})
+struct arch_timer_erratum_workaround {
+ const char *id; /* Indicate the Erratum ID */
+ u32 (*read_cntp_tval_el0)(void);
+ u32 (*read_cntv_tval_el0)(void);
+ u64 (*read_cntvct_el0)(void);
+};
+
+extern const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround;
#define arch_timer_reg_read_stable(reg) \
({ \
u64 _val; \
- if (needs_fsl_a008585_workaround()) \
- _val = __fsl_a008585_read_##reg(); \
+ if (needs_unstable_timer_counter_workaround()) \
+ _val = timer_unstable_counter_workaround->read_##reg();\
else \
_val = read_sysreg(reg); \
_val; \
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 446f6c46d4b1..1b67c3782d00 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,6 +25,7 @@
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
+#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
@@ -164,22 +165,25 @@ lr .req x30 // link register
/*
* Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
- * <symbol> is within the range +/- 4 GB of the PC.
+ * <symbol> is within the range +/- 4 GB of the PC when running
+ * in core kernel context. In module context, a movz/movk sequence
+ * is used, since modules may be loaded far away from the kernel
+ * when KASLR is in effect.
*/
/*
* @dst: destination register (64 bit wide)
* @sym: name of the symbol
- * @tmp: optional scratch register to be used if <dst> == sp, which
- * is not allowed in an adrp instruction
*/
- .macro adr_l, dst, sym, tmp=
- .ifb \tmp
+ .macro adr_l, dst, sym
+#ifndef MODULE
adrp \dst, \sym
add \dst, \dst, :lo12:\sym
- .else
- adrp \tmp, \sym
- add \dst, \tmp, :lo12:\sym
- .endif
+#else
+ movz \dst, #:abs_g3:\sym
+ movk \dst, #:abs_g2_nc:\sym
+ movk \dst, #:abs_g1_nc:\sym
+ movk \dst, #:abs_g0_nc:\sym
+#endif
.endm
/*
@@ -190,6 +194,7 @@ lr .req x30 // link register
* the address
*/
.macro ldr_l, dst, sym, tmp=
+#ifndef MODULE
.ifb \tmp
adrp \dst, \sym
ldr \dst, [\dst, :lo12:\sym]
@@ -197,6 +202,15 @@ lr .req x30 // link register
adrp \tmp, \sym
ldr \dst, [\tmp, :lo12:\sym]
.endif
+#else
+ .ifb \tmp
+ adr_l \dst, \sym
+ ldr \dst, [\dst]
+ .else
+ adr_l \tmp, \sym
+ ldr \dst, [\tmp]
+ .endif
+#endif
.endm
/*
@@ -206,8 +220,13 @@ lr .req x30 // link register
* while <src> needs to be preserved.
*/
.macro str_l, src, sym, tmp
+#ifndef MODULE
adrp \tmp, \sym
str \src, [\tmp, :lo12:\sym]
+#else
+ adr_l \tmp, \sym
+ str \src, [\tmp]
+#endif
.endm
/*
@@ -423,6 +442,28 @@ alternative_endif
.endm
/*
+ * Errata workaround prior to TTBR0_EL1 update
+ *
+ * val: TTBR value with new BADDR, preserved
+ * tmp0: temporary register, clobbered
+ * tmp1: other temporary register, clobbered
+ */
+ .macro pre_ttbr0_update_workaround, val, tmp0, tmp1
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
+ mrs \tmp0, ttbr0_el1
+ mov \tmp1, #FALKOR_RESERVED_ASID
+ bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR
+ msr ttbr0_el1, \tmp0
+ isb
+ bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR
+ msr ttbr0_el1, \tmp0
+ isb
+alternative_else_nop_endif
+#endif
+ .endm
+
+/*
* Errata workaround post TTBR0_EL1 update.
*/
.macro post_ttbr0_update_workaround
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 4174f09678c4..fb78a5d3b60b 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -35,7 +35,9 @@
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
#define ARM64_HAS_NO_FPSIMD 16
+#define ARM64_WORKAROUND_REPEAT_TLBI 17
+#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
-#define ARM64_NCAPS 17
+#define ARM64_NCAPS 19
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b4989df48670..4ce82ed3e7c3 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -29,7 +29,20 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
-/* CPU feature register tracking */
+/*
+ * CPU feature register tracking
+ *
+ * The safe value of a CPUID feature field is dependent on the implications
+ * of the values assigned to it by the architecture. Based on the relationship
+ * between the values, the features are classified into 3 types - LOWER_SAFE,
+ * HIGHER_SAFE and EXACT.
+ *
+ * The lowest value of all the CPUs is chosen for LOWER_SAFE and highest
+ * for HIGHER_SAFE. It is expected that all CPUs have the same value for
+ * a field when EXACT is specified, failing which, the safe value specified
+ * in the table is chosen.
+ */
+
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
@@ -42,8 +55,12 @@ enum ftr_type {
#define FTR_SIGNED true /* Value should be treated as signed */
#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+#define FTR_VISIBLE true /* Feature visible to the user space */
+#define FTR_HIDDEN false /* Feature is hidden from the user */
+
struct arm64_ftr_bits {
bool sign; /* Value is signed ? */
+ bool visible;
bool strict; /* CPU Sanity check: strict matching required ? */
enum ftr_type type;
u8 shift;
@@ -59,7 +76,9 @@ struct arm64_ftr_bits {
struct arm64_ftr_reg {
const char *name;
u64 strict_mask;
+ u64 user_mask;
u64 sys_val;
+ u64 user_val;
const struct arm64_ftr_bits *ftr_bits;
};
@@ -159,6 +178,11 @@ static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
+static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
+{
+ return (reg->user_val | (reg->sys_val & reg->user_mask));
+}
+
static inline int __attribute_const__
cpuid_feature_extract_field(u64 features, int field, bool sign)
{
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 26a68ddb11c1..fc502713ab37 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -56,6 +56,9 @@
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
+#define MIDR_CPU_VAR_REV(var, rev) \
+ (((var) << MIDR_VARIANT_SHIFT) | (rev))
+
#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
@@ -71,6 +74,7 @@
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
+#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -84,10 +88,13 @@
#define BRCM_CPU_PART_VULCAN 0x516
+#define QCOM_CPU_PART_FALKOR_V1 0x800
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 0b6b1633017f..e7445281e534 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -50,6 +50,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
#define __efi_call_early(f, ...) f(__VA_ARGS__)
+#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__)
#define efi_is_64bit() (true)
#define efi_call_proto(protocol, f, instance, ...) \
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index bc853663dd51..aecc07e09a18 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -332,6 +332,8 @@ bool aarch64_insn_is_branch(u32 insn);
u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+ u32 insn);
u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_branch_type type);
u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2a2752b5b6aa..6e99978e83bd 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -188,6 +188,9 @@
#define CPTR_EL2_DEFAULT 0x000033ff
/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TPMS (1 << 14)
+#define MDCR_EL2_E2PB_MASK (UL(0x3))
+#define MDCR_EL2_E2PB_SHIFT (UL(12))
#define MDCR_EL2_TDRA (1 << 11)
#define MDCR_EL2_TDOSA (1 << 10)
#define MDCR_EL2_TDA (1 << 9)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e5050388e062..f21fd3894370 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -70,9 +70,6 @@ struct kvm_arch {
/* Interrupt controller */
struct vgic_dist vgic;
-
- /* Timer */
- struct arch_timer_kvm timer;
};
#define KVM_NR_MEM_OBJS 40
@@ -229,7 +226,12 @@ struct kvm_vcpu_arch {
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
- struct kvm_guest_debug_arch host_debug_state;
+ struct {
+ /* {Break,watch}point registers */
+ struct kvm_guest_debug_arch regs;
+ /* Statistical profiling extension */
+ u64 pmscr_el1;
+ } host_debug_state;
/* VGIC state */
struct vgic_cpu vgic_cpu;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6f72fe8b0e3e..ed1246014901 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -47,7 +47,7 @@
* If the page is in the bottom half, we have to use the top half. If
* the page is in the top half, we have to use the bottom half:
*
- * T = __virt_to_phys(__hyp_idmap_text_start)
+ * T = __pa_symbol(__hyp_idmap_text_start)
* if (T & BIT(VA_BITS - 1))
* HYP_VA_MIN = 0 //idmap in upper half
* else
@@ -236,13 +236,11 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
kvm_pfn_t pfn,
- unsigned long size,
- bool ipa_uncached)
+ unsigned long size)
{
void *va = page_address(pfn_to_page(pfn));
- if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
- kvm_flush_dcache_to_poc(va, size);
+ kvm_flush_dcache_to_poc(va, size);
if (!icache_is_aliasing()) { /* PIPT */
flush_icache_range((unsigned long)va,
@@ -271,7 +269,7 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
}
-#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
+#define kvm_virt_to_phys(x) __pa_symbol(x)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index fc756e22c84c..606b20910a5c 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -19,7 +19,7 @@
__asm__(".arch_extension lse");
/* Move the ll/sc atomics out-of-line */
-#define __LL_SC_INLINE
+#define __LL_SC_INLINE notrace
#define __LL_SC_PREFIX(x) __ll_sc_##x
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x))
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index bfe632808d77..32f82723338a 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -102,25 +102,6 @@
#endif
/*
- * Physical vs virtual RAM address space conversion. These are
- * private definitions which should NOT be used outside memory.h
- * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#define __virt_to_phys(x) ({ \
- phys_addr_t __x = (phys_addr_t)(x); \
- __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \
- (__x - kimage_voffset); })
-
-#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
-#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
-
-/*
- * Convert a page to/from a physical address
- */
-#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page)))
-#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
-
-/*
* Memory types available.
*/
#define MT_DEVICE_nGnRnE 0
@@ -187,6 +168,48 @@ static inline unsigned long kaslr_offset(void)
#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT)
/*
+ * Physical vs virtual RAM address space conversion. These are
+ * private definitions which should NOT be used outside memory.h
+ * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+
+
+/*
+ * The linear kernel range starts in the middle of the virtual adddress
+ * space. Testing the top bit for the start of the region is a
+ * sufficient check.
+ */
+#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1)))
+
+#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
+#define __kimg_to_phys(addr) ((addr) - kimage_voffset)
+
+#define __virt_to_phys_nodebug(x) ({ \
+ phys_addr_t __x = (phys_addr_t)(x); \
+ __is_lm_address(__x) ? __lm_to_phys(__x) : \
+ __kimg_to_phys(__x); \
+})
+
+#define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x) __virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x) __pa_symbol_nodebug(x)
+#endif
+
+#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
+
+/*
+ * Convert a page to/from a physical address
+ */
+#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page)))
+#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
+
+/*
* Note: Drivers should NOT use these. They are the wrong
* translation for translating DMA addresses. Use the driver
* DMA support - see dma-mapping.h.
@@ -207,9 +230,12 @@ static inline void *phys_to_virt(phys_addr_t x)
* Drivers should NOT use these either.
*/
#define __pa(x) __virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x))
+#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
* virt_to_page(k) convert a _valid_ virtual address to struct page *
@@ -222,7 +248,7 @@ static inline void *phys_to_virt(phys_addr_t x)
#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#else
#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
-#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
+#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 0363fe80455c..1ef40d82cfd3 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -19,6 +19,10 @@
#ifndef __ASM_MMU_CONTEXT_H
#define __ASM_MMU_CONTEXT_H
+#define FALKOR_RESERVED_ASID 1
+
+#ifndef __ASSEMBLY__
+
#include <linux/compiler.h>
#include <linux/sched.h>
@@ -45,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
*/
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = virt_to_phys(empty_zero_page);
+ unsigned long ttbr = __pa_symbol(empty_zero_page);
write_sysreg(ttbr, ttbr0_el1);
isb();
@@ -114,7 +118,7 @@ static inline void cpu_install_idmap(void)
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(idmap_pg_dir, &init_mm);
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@@ -129,7 +133,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgd)
phys_addr_t pgd_phys = virt_to_phys(pgd);
- replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+ replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
cpu_install_idmap();
replace_phys(pgd_phys);
@@ -220,4 +224,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
void verify_cpu_asid_bits(void);
-#endif
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ffbb9a520563..0eef6064bf3b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -52,7 +52,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) pfn_to_page(PHYS_PFN(__pa(empty_zero_page)))
+#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
@@ -71,9 +71,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
-#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
+#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
-#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -84,8 +83,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_global(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID)
+/*
+ * Execute-only user mappings do not have the PTE_USER bit set. All valid
+ * kernel mappings have the PTE_UXN bit set.
+ */
+#define pte_valid_not_user(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
#define pte_valid_young(pte) \
((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
@@ -178,7 +181,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
* Only if the new pte is valid and kernel, otherwise TLB maintenance
* or update_mmu_cache() have the necessary barriers.
*/
- if (pte_valid_global(pte)) {
+ if (pte_valid_not_user(pte)) {
dsb(ishst);
isb();
}
@@ -212,7 +215,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
- if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte))
+ if (pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
}
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 747c65a616ed..c97b8bd2acba 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -187,7 +187,6 @@ static inline void spin_lock_prefetch(const void *ptr)
#endif
int cpu_enable_pan(void *__unused);
-int cpu_enable_uao(void *__unused);
int cpu_enable_cache_maint_trap(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 513daf050e84..11403fdd0a50 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -194,6 +194,26 @@ static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset)
return val;
}
+/*
+ * Read a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline unsigned long pt_regs_read_reg(const struct pt_regs *regs, int r)
+{
+ return (r == 31) ? 0 : regs->regs[r];
+}
+
+/*
+ * Write a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
+ unsigned long val)
+{
+ if (r != 31)
+ regs->regs[r] = val;
+}
+
/* Valid only for Kernel mode traps. */
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 98ae03f8eedd..ac24b6e798b1 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -32,8 +32,27 @@
* [11-8] : CRm
* [7-5] : Op2
*/
+#define Op0_shift 19
+#define Op0_mask 0x3
+#define Op1_shift 16
+#define Op1_mask 0x7
+#define CRn_shift 12
+#define CRn_mask 0xf
+#define CRm_shift 8
+#define CRm_mask 0xf
+#define Op2_shift 5
+#define Op2_mask 0x7
+
#define sys_reg(op0, op1, crn, crm, op2) \
- ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+ (((op0) << Op0_shift) | ((op1) << Op1_shift) | \
+ ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
+ ((op2) << Op2_shift))
+
+#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask)
+#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask)
+#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask)
+#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask)
+#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask)
#ifndef CONFIG_BROKEN_GAS_INST
@@ -190,6 +209,7 @@
#define ID_AA64MMFR2_CNP_SHIFT 0
/* id_aa64dfr0 */
+#define ID_AA64DFR0_PMSVER_SHIFT 32
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
#define ID_AA64DFR0_BRPS_SHIFT 12
@@ -245,6 +265,10 @@
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
#endif
+
+/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
+#define SYS_MPIDR_SAFE_VAL (1UL << 31)
+
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index deab52374119..af1c76981911 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -36,9 +36,21 @@
* not. The macros handles invoking the asm with or without the
* register argument as appropriate.
*/
-#define __TLBI_0(op, arg) asm ("tlbi " #op)
-#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg))
-#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
+#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \
+ ALTERNATIVE("nop\n nop", \
+ "dsb ish\n tlbi " #op, \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ : : )
+
+#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
+ ALTERNATIVE("nop\n nop", \
+ "dsb ish\n tlbi " #op ", %0", \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ : : "r" (arg))
+
+#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 46da3ea638bb..5308d696311b 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -379,9 +379,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u
{
unsigned long res = n;
kasan_check_write(to, n);
+ check_object_size(to, n, false);
if (access_ok(VERIFY_READ, from, n)) {
- check_object_size(to, n, false);
res = __arch_copy_from_user(to, from, n);
}
if (unlikely(res))
@@ -392,9 +392,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u
static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
{
kasan_check_read(from, n);
+ check_object_size(from, n, true);
if (access_ok(VERIFY_WRITE, to, n)) {
- check_object_size(from, n, true);
n = __arch_copy_to_user(to, from, n);
}
return n;
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index fea10736b11f..439f6b5d31f6 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -47,6 +47,7 @@
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/sysreg.h>
+#include <asm/cpufeature.h>
/*
* __boot_cpu_mode records what mode CPUs were booted in.
@@ -80,6 +81,14 @@ static inline bool is_kernel_in_hyp_mode(void)
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
+static inline bool has_vhe(void)
+{
+ if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ return true;
+
+ return false;
+}
+
#ifdef CONFIG_ARM64_VHE
extern void verify_cpu_run_el(void);
#else
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a739287ef6a3..61c263cba272 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -30,5 +30,7 @@
#define HWCAP_ATOMICS (1 << 8)
#define HWCAP_FPHP (1 << 9)
#define HWCAP_ASIMDHP (1 << 10)
+#define HWCAP_CPUID (1 << 11)
+#define HWCAP_ASIMDRDM (1 << 12)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 3051f86a9b5f..c2860358ae3e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* Device Control API on vcpu fd */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index b5c3933ed441..d1ff83dfe5de 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -77,6 +77,7 @@ struct user_fpsimd_state {
__uint128_t vregs[32];
__u32 fpsr;
__u32 fpcr;
+ __u32 __reserved[2];
};
struct user_hwdebug_state {
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d66bbaafc0c..1606c6b2a280 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,3 +55,7 @@ obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
head-y := head.o
extra-y += $(head-y) vmlinux.lds
+
+ifeq ($(CONFIG_DEBUG_EFI),y)
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+endif
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index a32b4011d711..1f5655cd9cc9 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/acpi.h>
+#include <linux/mm.h>
#include <linux/types.h>
#include <asm/cpu_ops.h>
@@ -109,7 +110,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
* that read this address need to convert this address to the
* Boot-Loader's endianness before jumping.
*/
- writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+ writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
arch_send_wakeup_ipi_mask(cpumask_of(cpu));
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 78f368039c79..e9c4dc9e0ada 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -73,5 +73,5 @@ NOKPROBE_SYMBOL(_mcount);
#endif
/* arm-smccc */
-EXPORT_SYMBOL(arm_smccc_smc);
-EXPORT_SYMBOL(arm_smccc_hvc);
+EXPORT_SYMBOL(__arm_smccc_smc);
+EXPORT_SYMBOL(__arm_smccc_hvc);
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index ecf9298a12d4..86032a012388 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -636,7 +636,7 @@ static int __init armv8_deprecated_init(void)
if(system_supports_mixed_endian_el0())
register_insn_emulation(&setend_ops);
else
- pr_info("setend instruction emulation is not supported on the system");
+ pr_info("setend instruction emulation is not supported on this system\n");
}
cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index bc049afc73a7..b3bb7ef97bc8 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -143,8 +143,11 @@ int main(void)
DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
- DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
- DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
+ DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
+ DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
+ DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
+ DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
+
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 9617301f76b5..3f2250fc391b 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -84,7 +84,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
static int __init_cache_level(unsigned int cpu)
{
- unsigned int ctype, level, leaves;
+ unsigned int ctype, level, leaves, of_level;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -97,6 +97,17 @@ static int __init_cache_level(unsigned int cpu)
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
}
+ of_level = of_find_last_cache_level(cpu);
+ if (level < of_level) {
+ /*
+ * some external caches not specified in CLIDR_EL1
+ * the information may be available in the device tree
+ * only unified external caches are considered here
+ */
+ leaves += (of_level - level);
+ level = of_level;
+ }
+
this_cpu_ci->num_levels = level;
this_cpu_ci->num_leaves = leaves;
return 0;
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
index d4e9ecb264f0..6c2b1b4f57c9 100644
--- a/arch/arm64/kernel/cpu-reset.h
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -24,7 +24,7 @@ static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
is_hyp_mode_available();
- restart = (void *)virt_to_phys(__cpu_soft_restart);
+ restart = (void *)__pa_symbol(__cpu_soft_restart);
cpu_install_idmap();
restart(el2_switch, entry, arg0, arg1, arg2);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b75e917aac46..f6cc67e7626e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -79,8 +79,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
/* Cortex-A57 r0p0 - r1p2 */
.desc = "ARM erratum 832075",
.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
- MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
- (1 << MIDR_VARIANT_SHIFT) | 2),
+ MIDR_RANGE(MIDR_CORTEX_A57,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(1, 2)),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_834220
@@ -88,8 +89,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
/* Cortex-A57 r0p0 - r1p2 */
.desc = "ARM erratum 834220",
.capability = ARM64_WORKAROUND_834220,
- MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
- (1 << MIDR_VARIANT_SHIFT) | 2),
+ MIDR_RANGE(MIDR_CORTEX_A57,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(1, 2)),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_845719
@@ -113,8 +115,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
.desc = "Cavium erratum 27456",
.capability = ARM64_WORKAROUND_CAVIUM_27456,
- MIDR_RANGE(MIDR_THUNDERX, 0x00,
- (1 << MIDR_VARIANT_SHIFT) | 1),
+ MIDR_RANGE(MIDR_THUNDERX,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(1, 1)),
},
{
/* Cavium ThunderX, T81 pass 1.0 */
@@ -130,6 +133,24 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.def_scope = SCOPE_LOCAL_CPU,
.enable = cpu_enable_trap_ctr_access,
},
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+ {
+ .desc = "Qualcomm Technologies Falkor erratum 1003",
+ .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+ MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(0, 0)),
+ },
+#endif
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+ {
+ .desc = "Qualcomm Technologies Falkor erratum 1009",
+ .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+ MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(0, 0)),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fdf8f045929f..abda8e861865 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -23,12 +23,14 @@
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
+#include <linux/mm.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
+#include <asm/traps.h>
#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
@@ -52,9 +54,10 @@ EXPORT_SYMBOL(cpu_hwcaps);
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
-#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
.sign = SIGNED, \
+ .visible = VISIBLE, \
.strict = STRICT, \
.type = TYPE, \
.shift = SHIFT, \
@@ -63,12 +66,12 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
}
/* Define a feature with unsigned values */
-#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
- __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
/* Define a feature with a signed value */
-#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
- __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
#define ARM64_FTR_END \
{ \
@@ -80,85 +83,80 @@ static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
+/*
+ * NOTE: Any changes to the visibility of features should be kept in
+ * sync with the documentation of the CPU feature register ABI.
+ */
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
/* Linux doesn't care about the EL3 */
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
/*
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_ctr[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - AIVIVT.
*/
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
@@ -168,79 +166,78 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ /*
+ * We can instantiate multiple PMU instances with different levels
+ * of support.
+ */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_mvfr2[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_dczid[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_isar5[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
- S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
ARM64_FTR_END,
};
@@ -251,29 +248,24 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = {
* id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
*/
static const struct arm64_ftr_bits ftr_generic_32bits[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
ARM64_FTR_END,
};
-static const struct arm64_ftr_bits ftr_generic[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+/* Table for a single 32bit feature value */
+static const struct arm64_ftr_bits ftr_single32[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 32, 0),
ARM64_FTR_END,
};
-static const struct arm64_ftr_bits ftr_generic32[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
- ARM64_FTR_END,
-};
-
-static const struct arm64_ftr_bits ftr_aa64raz[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+static const struct arm64_ftr_bits ftr_raz[] = {
ARM64_FTR_END,
};
@@ -314,15 +306,15 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
- ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+ ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
- ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+ ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
- ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+ ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -334,7 +326,7 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
/* Op1 = 3, CRn = 14, CRm = 0 */
- ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+ ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_single32),
};
static int search_cmp_ftr_reg(const void *id, const void *regp)
@@ -410,25 +402,43 @@ static void __init sort_ftr_regs(void)
/*
* Initialise the CPU feature register from Boot CPU values.
* Also initiliases the strict_mask for the register.
+ * Any bits that are not covered by an arm64_ftr_bits entry are considered
+ * RES0 for the system-wide value, and must strictly match.
*/
static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
{
u64 val = 0;
u64 strict_mask = ~0x0ULL;
+ u64 user_mask = 0;
+ u64 valid_mask = 0;
+
const struct arm64_ftr_bits *ftrp;
struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
BUG_ON(!reg);
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
val = arm64_ftr_set_value(ftrp, val, ftr_new);
+
+ valid_mask |= ftr_mask;
if (!ftrp->strict)
- strict_mask &= ~arm64_ftr_mask(ftrp);
+ strict_mask &= ~ftr_mask;
+ if (ftrp->visible)
+ user_mask |= ftr_mask;
+ else
+ reg->user_val = arm64_ftr_set_value(ftrp,
+ reg->user_val,
+ ftrp->safe_val);
}
+
+ val &= valid_mask;
+
reg->sys_val = val;
reg->strict_mask = strict_mask;
+ reg->user_mask = user_mask;
}
void __init init_cpu_features(struct cpuinfo_arm64 *info)
@@ -635,6 +645,9 @@ u64 read_system_reg(u32 id)
return regp->sys_val;
}
+#define read_sysreg_case(r) \
+ case r: return read_sysreg_s(r)
+
/*
* __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
* Read the system register on the current CPU
@@ -642,36 +655,37 @@ u64 read_system_reg(u32 id)
static u64 __raw_read_system_reg(u32 sys_id)
{
switch (sys_id) {
- case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1);
- case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1);
- case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1);
- case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1);
- case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1);
- case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1);
- case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1);
- case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1);
- case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1);
- case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1);
- case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1);
- case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1);
- case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1);
- case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1);
- case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1);
- case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1);
-
- case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1);
- case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1);
- case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1);
- case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1);
- case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1);
-
- case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0);
- case SYS_CTR_EL0: return read_cpuid(CTR_EL0);
- case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0);
+ read_sysreg_case(SYS_ID_PFR0_EL1);
+ read_sysreg_case(SYS_ID_PFR1_EL1);
+ read_sysreg_case(SYS_ID_DFR0_EL1);
+ read_sysreg_case(SYS_ID_MMFR0_EL1);
+ read_sysreg_case(SYS_ID_MMFR1_EL1);
+ read_sysreg_case(SYS_ID_MMFR2_EL1);
+ read_sysreg_case(SYS_ID_MMFR3_EL1);
+ read_sysreg_case(SYS_ID_ISAR0_EL1);
+ read_sysreg_case(SYS_ID_ISAR1_EL1);
+ read_sysreg_case(SYS_ID_ISAR2_EL1);
+ read_sysreg_case(SYS_ID_ISAR3_EL1);
+ read_sysreg_case(SYS_ID_ISAR4_EL1);
+ read_sysreg_case(SYS_ID_ISAR5_EL1);
+ read_sysreg_case(SYS_MVFR0_EL1);
+ read_sysreg_case(SYS_MVFR1_EL1);
+ read_sysreg_case(SYS_MVFR2_EL1);
+
+ read_sysreg_case(SYS_ID_AA64PFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64PFR1_EL1);
+ read_sysreg_case(SYS_ID_AA64DFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64DFR1_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
+
+ read_sysreg_case(SYS_CNTFRQ_EL0);
+ read_sysreg_case(SYS_CTR_EL0);
+ read_sysreg_case(SYS_DCZID_EL0);
+
default:
BUG();
return 0;
@@ -720,13 +734,11 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
{
u32 midr = read_cpuid_id();
- u32 rv_min, rv_max;
/* Cavium ThunderX pass 1.x and 2.x */
- rv_min = 0;
- rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
-
- return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+ return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX,
+ MIDR_CPU_VAR_REV(0, 0),
+ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
}
static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
@@ -737,7 +749,7 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused
static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
int __unused)
{
- phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start);
+ phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
/*
* Activate the lower HYP offset only if:
@@ -806,7 +818,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.field_pos = ID_AA64MMFR2_UAO_SHIFT,
.min_field_value = 1,
- .enable = cpu_enable_uao,
+ /*
+ * We rely on stop_machine() calling uao_thread_switch() to set
+ * UAO immediately after patching.
+ */
},
#endif /* CONFIG_ARM64_UAO */
#ifdef CONFIG_ARM64_PAN
@@ -868,6 +883,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -933,6 +949,8 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
{
+ /* We support emulation of accesses to CPU ID feature registers */
+ elf_hwcap |= HWCAP_CPUID;
for (; hwcaps->matches; hwcaps++)
if (hwcaps->matches(hwcaps, hwcaps->def_scope))
cap_set_elf_hwcap(hwcaps);
@@ -1120,3 +1138,101 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
}
+
+/*
+ * We emulate only the following system register space.
+ * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7]
+ * See Table C5-6 System instruction encodings for System register accesses,
+ * ARMv8 ARM(ARM DDI 0487A.f) for more details.
+ */
+static inline bool __attribute_const__ is_emulated(u32 id)
+{
+ return (sys_reg_Op0(id) == 0x3 &&
+ sys_reg_CRn(id) == 0x0 &&
+ sys_reg_Op1(id) == 0x0 &&
+ (sys_reg_CRm(id) == 0 ||
+ ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7))));
+}
+
+/*
+ * With CRm == 0, reg should be one of :
+ * MIDR_EL1, MPIDR_EL1 or REVIDR_EL1.
+ */
+static inline int emulate_id_reg(u32 id, u64 *valp)
+{
+ switch (id) {
+ case SYS_MIDR_EL1:
+ *valp = read_cpuid_id();
+ break;
+ case SYS_MPIDR_EL1:
+ *valp = SYS_MPIDR_SAFE_VAL;
+ break;
+ case SYS_REVIDR_EL1:
+ /* IMPLEMENTATION DEFINED values are emulated with 0 */
+ *valp = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int emulate_sys_reg(u32 id, u64 *valp)
+{
+ struct arm64_ftr_reg *regp;
+
+ if (!is_emulated(id))
+ return -EINVAL;
+
+ if (sys_reg_CRm(id) == 0)
+ return emulate_id_reg(id, valp);
+
+ regp = get_arm64_ftr_reg(id);
+ if (regp)
+ *valp = arm64_ftr_reg_user_value(regp);
+ else
+ /*
+ * The untracked registers are either IMPLEMENTATION DEFINED
+ * (e.g, ID_AFR0_EL1) or reserved RAZ.
+ */
+ *valp = 0;
+ return 0;
+}
+
+static int emulate_mrs(struct pt_regs *regs, u32 insn)
+{
+ int rc;
+ u32 sys_reg, dst;
+ u64 val;
+
+ /*
+ * sys_reg values are defined as used in mrs/msr instruction.
+ * shift the imm value to get the encoding.
+ */
+ sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5;
+ rc = emulate_sys_reg(sys_reg, &val);
+ if (!rc) {
+ dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
+ pt_regs_write_reg(regs, dst, val);
+ regs->pc += 4;
+ }
+
+ return rc;
+}
+
+static struct undef_hook mrs_hook = {
+ .instr_mask = 0xfff00000,
+ .instr_val = 0xd5300000,
+ .pstate_mask = COMPAT_PSR_MODE_MASK,
+ .pstate_val = PSR_MODE_EL0t,
+ .fn = emulate_mrs,
+};
+
+static int __init enable_mrs_emulation(void)
+{
+ register_undef_hook(&mrs_hook);
+ return 0;
+}
+
+late_initcall(enable_mrs_emulation);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 7b7be71e87bf..5b22c687f02a 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -63,6 +63,8 @@ static const char *const hwcap_str[] = {
"atomics",
"fphp",
"asimdhp",
+ "cpuid",
+ "asimdrdm",
NULL
};
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index e88c064b845c..4e6ad355bd05 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -46,8 +46,7 @@ ENTRY(entry)
* efi_system_table_t *sys_table,
* unsigned long *image_addr) ;
*/
- adrp x8, _text
- add x8, x8, #:lo12:_text
+ adr_l x8, _text
add x2, sp, 16
str x8, [x2]
bl efi_entry
@@ -68,10 +67,8 @@ ENTRY(entry)
/*
* Calculate size of the kernel Image (same for original and copy).
*/
- adrp x1, _text
- add x1, x1, #:lo12:_text
- adrp x2, _edata
- add x2, x2, #:lo12:_edata
+ adr_l x1, _text
+ adr_l x2, _edata
sub x1, x2, x1
/*
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index aef02d2af3b5..e1be42e11ff5 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
@@ -98,8 +99,7 @@
ENTRY(_mcount)
mcount_enter
- adrp x0, ftrace_trace_function
- ldr x2, [x0, #:lo12:ftrace_trace_function]
+ ldr_l x2, ftrace_trace_function
adr x0, ftrace_stub
cmp x0, x2 // if (ftrace_trace_function
b.eq skip_ftrace_call // != ftrace_stub) {
@@ -115,15 +115,12 @@ skip_ftrace_call: // return;
mcount_exit // return;
// }
skip_ftrace_call:
- adrp x1, ftrace_graph_return
- ldr x2, [x1, #:lo12:ftrace_graph_return]
+ ldr_l x2, ftrace_graph_return
cmp x0, x2 // if ((ftrace_graph_return
b.ne ftrace_graph_caller // != ftrace_stub)
- adrp x1, ftrace_graph_entry // || (ftrace_graph_entry
- adrp x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub))
- ldr x2, [x1, #:lo12:ftrace_graph_entry]
- add x0, x0, #:lo12:ftrace_graph_entry_stub
+ ldr_l x2, ftrace_graph_entry // || (ftrace_graph_entry
+ adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub))
cmp x0, x2
b.ne ftrace_graph_caller // ftrace_graph_caller();
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 923841ffe4a9..43512d4d7df2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -683,7 +683,7 @@ el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mov x2, x25
- bl bad_mode
+ bl bad_el0_sync
b ret_to_user
ENDPROC(el0_sync)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4b1abac3485a..4fb6ccd886d1 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -149,7 +149,7 @@ extra_header_fields:
.quad 0 // SizeOfHeapReserve
.quad 0 // SizeOfHeapCommit
.long 0 // LoaderFlags
- .long 0x6 // NumberOfRvaAndSizes
+ .long (section_table - .) / 8 // NumberOfRvaAndSizes
.quad 0 // ExportTable
.quad 0 // ImportTable
@@ -158,6 +158,11 @@ extra_header_fields:
.quad 0 // CertificationTable
.quad 0 // BaseRelocationTable
+#ifdef CONFIG_DEBUG_EFI
+ .long efi_debug_table - _head // DebugTable
+ .long efi_debug_table_size
+#endif
+
// Section table
section_table:
@@ -195,6 +200,46 @@ section_table:
.short 0 // NumberOfLineNumbers (0 for executables)
.long 0xe0500020 // Characteristics (section flags)
+#ifdef CONFIG_DEBUG_EFI
+ /*
+ * The debug table is referenced via its Relative Virtual Address (RVA),
+ * which is only defined for those parts of the image that are covered
+ * by a section declaration. Since this header is not covered by any
+ * section, the debug table must be emitted elsewhere. So stick it in
+ * the .init.rodata section instead.
+ *
+ * Note that the EFI debug entry itself may legally have a zero RVA,
+ * which means we can simply put it right after the section headers.
+ */
+ __INITRODATA
+
+ .align 2
+efi_debug_table:
+ // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
+ .long 0 // Characteristics
+ .long 0 // TimeDateStamp
+ .short 0 // MajorVersion
+ .short 0 // MinorVersion
+ .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW
+ .long efi_debug_entry_size // SizeOfData
+ .long 0 // RVA
+ .long efi_debug_entry - _head // FileOffset
+
+ .set efi_debug_table_size, . - efi_debug_table
+ .previous
+
+efi_debug_entry:
+ // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
+ .ascii "NB10" // Signature
+ .long 0 // Unknown
+ .long 0 // Unknown2
+ .long 0 // Unknown3
+
+ .asciz VMLINUX_PATH
+
+ .set efi_debug_entry_size, . - efi_debug_entry
+#endif
+
/*
* EFI will load .text onwards at the 4k section alignment
* described in the PE/COFF header. To ensure that instruction
@@ -483,7 +528,7 @@ ENTRY(kimage_vaddr)
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
ENTRY(el2_setup)
@@ -592,15 +637,26 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
#endif
/* EL2 debug */
- mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
- sbfx x0, x0, #8, #4
+ mrs x1, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx x0, x1, #8, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
4:
- csel x0, xzr, x0, lt // all PMU counters from EL1
- msr mdcr_el2, x0 // (if they exist)
+ csel x3, xzr, x0, lt // all PMU counters from EL1
+
+ /* Statistical profiling */
+ ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer
+ cbz x0, 6f // Skip if SPE not present
+ cbnz x2, 5f // VHE?
+ mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ orr x3, x3, x1 // If we don't have VHE, then
+ b 6f // use EL1&0 translation.
+5: // For VHE, use EL2 translation
+ orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
+6:
+ msr mdcr_el2, x3 // Configure debug traps
/* Stage-2 translation */
msr vttbr_el2, xzr
@@ -613,8 +669,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
install_el2_stub:
/* Hypervisor stub */
- adrp x0, __hyp_stub_vectors
- add x0, x0, #:lo12:__hyp_stub_vectors
+ adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
@@ -628,7 +683,7 @@ ENDPROC(el2_setup)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
- * in x20. See arch/arm64/include/asm/virt.h for more info.
+ * in w0. See arch/arm64/include/asm/virt.h for more info.
*/
set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index fe301cbcb442..97a7384100f3 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -50,9 +50,6 @@
*/
extern int in_suspend;
-/* Find a symbols alias in the linear map */
-#define LMADDR(x) phys_to_virt(virt_to_phys(x))
-
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
@@ -102,8 +99,8 @@ static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
int pfn_is_nosave(unsigned long pfn)
{
- unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
- unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+ unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);
return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
}
@@ -125,12 +122,12 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
return -EOVERFLOW;
arch_hdr_invariants(&hdr->invariants);
- hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir);
+ hdr->ttbr1_el1 = __pa_symbol(swapper_pg_dir);
hdr->reenter_kernel = _cpu_resume;
/* We can't use __hyp_get_vectors() because kvm may still be loaded */
if (el2_reset_needed())
- hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+ hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors);
else
hdr->__hyp_stub_vectors = 0;
@@ -460,7 +457,6 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
- void *lm_restore_pblist;
phys_addr_t phys_hibernate_exit;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
@@ -472,7 +468,7 @@ int swsusp_arch_resume(void)
*/
tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!tmp_pg_dir) {
- pr_err("Failed to allocate memory for temporary page tables.");
+ pr_err("Failed to allocate memory for temporary page tables.\n");
rc = -ENOMEM;
goto out;
}
@@ -481,18 +477,12 @@ int swsusp_arch_resume(void)
goto out;
/*
- * Since we only copied the linear map, we need to find restore_pblist's
- * linear map address.
- */
- lm_restore_pblist = LMADDR(restore_pblist);
-
- /*
* We need a zero page that is zero before & after resume in order to
* to break before make on the ttbr1 page tables.
*/
zero_page = (void *)get_safe_page(GFP_ATOMIC);
if (!zero_page) {
- pr_err("Failed to allocate zero page.");
+ pr_err("Failed to allocate zero page.\n");
rc = -ENOMEM;
goto out;
}
@@ -512,7 +502,7 @@ int swsusp_arch_resume(void)
&phys_hibernate_exit,
(void *)get_safe_page, GFP_ATOMIC);
if (rc) {
- pr_err("Failed to create safe executable page for hibernate_exit code.");
+ pr_err("Failed to create safe executable page for hibernate_exit code.\n");
goto out;
}
@@ -537,7 +527,7 @@ int swsusp_arch_resume(void)
}
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
- resume_hdr.reenter_kernel, lm_restore_pblist,
+ resume_hdr.reenter_kernel, restore_pblist,
resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
out:
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 94b62c1fa4df..b6badff5a151 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -93,10 +93,10 @@ static void __kprobes *patch_map(void *addr, int fixmap)
bool module = !core_kernel_text(uintaddr);
struct page *page;
- if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+ if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
page = vmalloc_to_page(addr);
else if (!module)
- page = pfn_to_page(PHYS_PFN(__pa(addr)));
+ page = phys_to_page(__pa_symbol(addr));
else
return addr;
@@ -417,6 +417,35 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
return insn;
}
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+ u32 insn)
+{
+ int shift;
+
+ switch (type) {
+ case AARCH64_INSN_REGTYPE_RT:
+ case AARCH64_INSN_REGTYPE_RD:
+ shift = 0;
+ break;
+ case AARCH64_INSN_REGTYPE_RN:
+ shift = 5;
+ break;
+ case AARCH64_INSN_REGTYPE_RT2:
+ case AARCH64_INSN_REGTYPE_RA:
+ shift = 10;
+ break;
+ case AARCH64_INSN_REGTYPE_RM:
+ shift = 16;
+ break;
+ default:
+ pr_err("%s: unknown register type encoding %d\n", __func__,
+ type);
+ return 0;
+ }
+
+ return (insn >> shift) & GENMASK(4, 0);
+}
+
static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
u32 insn,
enum aarch64_insn_register reg)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 357d3efe1366..be05868418ee 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -17,6 +17,8 @@
#include <linux/kernel.h>
#include <linux/kprobes.h>
+#include <asm/ptrace.h>
+
#include "simulate-insn.h"
#define bbl_displacement(insn) \
@@ -36,30 +38,22 @@
static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
{
- if (reg < 31)
- regs->regs[reg] = val;
+ pt_regs_write_reg(regs, reg, val);
}
static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val)
{
- if (reg < 31)
- regs->regs[reg] = lower_32_bits(val);
+ pt_regs_write_reg(regs, reg, lower_32_bits(val));
}
static inline u64 get_x_reg(struct pt_regs *regs, int reg)
{
- if (reg < 31)
- return regs->regs[reg];
- else
- return 0;
+ return pt_regs_read_reg(regs, reg);
}
static inline u32 get_w_reg(struct pt_regs *regs, int reg)
{
- if (reg < 31)
- return lower_32_bits(regs->regs[reg]);
- else
- return 0;
+ return lower_32_bits(pt_regs_read_reg(regs, reg));
}
static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a3a2816ba73a..1ad48f93abdd 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -339,7 +339,7 @@ static void entry_task_switch(struct task_struct *next)
/*
* Thread switching.
*/
-struct task_struct *__switch_to(struct task_struct *prev,
+__notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next)
{
struct task_struct *last;
@@ -407,7 +407,7 @@ unsigned long arch_align_stack(unsigned long sp)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
if (is_compat_task())
- return randomize_page(mm->brk, 0x02000000);
+ return randomize_page(mm->brk, SZ_32M);
else
- return randomize_page(mm->brk, 0x40000000);
+ return randomize_page(mm->brk, SZ_1G);
}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 42816bebb1e0..e8edbf13302a 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,6 +20,7 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/psci.h>
+#include <linux/mm.h>
#include <uapi/linux/psci.h>
@@ -45,7 +46,7 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu)
static int cpu_psci_cpu_boot(unsigned int cpu)
{
- int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+ int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry));
if (err)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index fc35e06ccaac..a22161ccf447 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -551,6 +551,8 @@ static int hw_break_set(struct task_struct *target,
/* (address, ctrl) registers */
limit = regset->n * regset->size;
while (count && offset < limit) {
+ if (count < PTRACE_HBP_ADDR_SZ)
+ return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr,
offset, offset + PTRACE_HBP_ADDR_SZ);
if (ret)
@@ -560,6 +562,8 @@ static int hw_break_set(struct task_struct *target,
return ret;
offset += PTRACE_HBP_ADDR_SZ;
+ if (!count)
+ break;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl,
offset, offset + PTRACE_HBP_CTRL_SZ);
if (ret)
@@ -596,7 +600,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_pt_regs newregs;
+ struct user_pt_regs newregs = task_pt_regs(target)->user_regs;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1);
if (ret)
@@ -626,7 +630,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_fpsimd_state newstate;
+ struct user_fpsimd_state newstate =
+ target->thread.fpsimd_state.user_fpsimd;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
if (ret)
@@ -650,7 +655,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- unsigned long tls;
+ unsigned long tls = target->thread.tp_value;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
if (ret)
@@ -676,7 +681,8 @@ static int system_call_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int syscallno, ret;
+ int syscallno = task_pt_regs(target)->syscallno;
+ int ret;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1);
if (ret)
@@ -948,7 +954,7 @@ static int compat_tls_set(struct task_struct *target,
const void __user *ubuf)
{
int ret;
- compat_ulong_t tls;
+ compat_ulong_t tls = target->thread.tp_value;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
if (ret)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b051367e2149..952e2c0dabd5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
#include <linux/of_fdt.h>
#include <linux/efi.h>
#include <linux/psci.h>
+#include <linux/mm.h>
#include <asm/acpi.h>
#include <asm/fixmap.h>
@@ -199,16 +200,16 @@ static void __init request_standard_resources(void)
struct memblock_region *region;
struct resource *res;
- kernel_code.start = virt_to_phys(_text);
- kernel_code.end = virt_to_phys(__init_begin - 1);
- kernel_data.start = virt_to_phys(_sdata);
- kernel_data.end = virt_to_phys(_end - 1);
+ kernel_code.start = __pa_symbol(_text);
+ kernel_code.end = __pa_symbol(__init_begin - 1);
+ kernel_data.start = __pa_symbol(_sdata);
+ kernel_data.end = __pa_symbol(_end - 1);
for_each_memblock(memory, region) {
res = alloc_bootmem_low(sizeof(*res));
if (memblock_is_nomap(region)) {
res->name = "reserved";
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_MEM;
} else {
res->name = "System RAM";
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
@@ -297,7 +298,7 @@ void __init setup_arch(char **cmdline_p)
* faults in case uaccess_enable() is inadvertently called by the init
* thread.
*/
- init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+ init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
#endif
#ifdef CONFIG_VT
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index ae0496fa4235..62522342e1e4 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -12,6 +12,7 @@
*
*/
#include <linux/linkage.h>
+#include <linux/arm-smccc.h>
#include <asm/asm-offsets.h>
.macro SMCCC instr
@@ -20,24 +21,32 @@
ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ret
+ ldr x4, [sp, #8]
+ cbz x4, 1f /* no quirk structure */
+ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
+ cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
+ b.ne 1f
+ str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+1: ret
.cfi_endproc
.endm
/*
* void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
* unsigned long a3, unsigned long a4, unsigned long a5,
- * unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ * unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ * struct arm_smccc_quirk *quirk)
*/
-ENTRY(arm_smccc_smc)
+ENTRY(__arm_smccc_smc)
SMCCC smc
-ENDPROC(arm_smccc_smc)
+ENDPROC(__arm_smccc_smc)
/*
* void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
* unsigned long a3, unsigned long a4, unsigned long a5,
- * unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ * unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
+ * struct arm_smccc_quirk *quirk)
*/
-ENTRY(arm_smccc_hvc)
+ENTRY(__arm_smccc_hvc)
SMCCC hvc
-ENDPROC(arm_smccc_hvc)
+ENDPROC(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index cb87234cfcf2..a8ec5da530af 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -603,9 +603,9 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
*/
static void __init of_parse_and_init_cpus(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
- while ((dn = of_find_node_by_type(dn, "cpu"))) {
+ for_each_node_by_type(dn, "cpu") {
u64 hwid = of_get_cpu_mpidr(dn);
if (hwid == INVALID_HWID)
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 9a00eee9acc8..93034651c87e 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -21,6 +21,7 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/types.h>
+#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
@@ -98,7 +99,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* boot-loader's endianess before jumping. This is mandated by
* the boot protocol.
*/
- writeq_relaxed(__pa(secondary_holding_pen), release_addr);
+ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
__flush_dcache_area((__force void *)release_addr,
sizeof(*release_addr));
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 23e9e13bd2aa..565dd69888cc 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,6 +11,7 @@
* for more details.
*/
+#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/init.h>
@@ -40,7 +41,6 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}
-#ifdef CONFIG_PROC_SYSCTL
static ssize_t cpu_capacity_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -97,7 +97,6 @@ static int register_cpu_capacity_sysctl(void)
return 0;
}
subsys_initcall(register_cpu_capacity_sysctl);
-#endif
static u32 capacity_scale;
static u32 *raw_capacity;
@@ -209,7 +208,12 @@ static struct notifier_block init_cpu_capacity_notifier = {
static int __init register_cpufreq_notifier(void)
{
- if (cap_parsing_failed)
+ /*
+ * on ACPI-based systems we need to use the default cpu capacity
+ * until we have the necessary code to parse the cpu capacity, so
+ * skip registering cpufreq notifier.
+ */
+ if (!acpi_disabled || cap_parsing_failed)
return -EINVAL;
if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5b830be79c01..7d47c2cdfd93 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -466,7 +466,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
int ret = 0;
- address = (rt == 31) ? 0 : regs->regs[rt];
+ address = pt_regs_read_reg(regs, rt);
switch (crm) {
case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */
@@ -495,8 +495,10 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
{
int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+ unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
+
+ pt_regs_write_reg(regs, rt, val);
- regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val;
regs->pc += 4;
}
@@ -531,7 +533,12 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
return;
}
- force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+ /*
+ * New SYS instructions may previously have been undefined at EL0. Fall
+ * back to our usual undefined instruction handler so that we handle
+ * these consistently.
+ */
+ do_undefinstr(regs);
}
long compat_arm_syscall(struct pt_regs *regs);
@@ -604,17 +611,34 @@ const char *esr_get_class_string(u32 esr)
}
/*
- * bad_mode handles the impossible case in the exception vector.
+ * bad_mode handles the impossible case in the exception vector. This is always
+ * fatal.
*/
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
- siginfo_t info;
- void __user *pc = (void __user *)instruction_pointer(regs);
console_verbose();
pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
+
+ die("Oops - bad mode", regs, 0);
+ local_irq_disable();
+ panic("bad mode");
+}
+
+/*
+ * bad_el0_sync handles unexpected, but potentially recoverable synchronous
+ * exceptions taken from EL0. Unlike bad_mode, this returns.
+ */
+asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+{
+ siginfo_t info;
+ void __user *pc = (void __user *)instruction_pointer(regs);
+ console_verbose();
+
+ pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n",
+ smp_processor_id(), esr, esr_get_class_string(esr));
__show_regs(regs);
info.si_signo = SIGILL;
@@ -622,7 +646,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
info.si_code = ILL_ILLOPC;
info.si_addr = pc;
- arm64_notify_die("Oops - bad mode", regs, &info, 0);
+ current->thread.fault_address = 0;
+ current->thread.fault_code = 0;
+
+ force_sig_info(info.si_signo, &info, current);
}
void __pte_error(const char *file, int line, unsigned long val)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a2c2478e7d78..41b6e31f8f55 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -123,6 +123,7 @@ static int __init vdso_init(void)
{
int i;
struct page **vdso_pagelist;
+ unsigned long pfn;
if (memcmp(&vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
@@ -140,11 +141,14 @@ static int __init vdso_init(void)
return -ENOMEM;
/* Grab the vDSO data page. */
- vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
+ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
/* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(&vdso_start);
+
for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
+ vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
vdso_spec[0].pages = &vdso_pagelist[0];
vdso_spec[1].pages = &vdso_pagelist[1];
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index d50a82a16ff6..afd51bebb9c5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-ccflags-y += -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic
CFLAGS_arm.o := -I.
CFLAGS_mmu.o := -I.
@@ -19,6 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
@@ -31,6 +32,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 47e5f0feaee8..dbadfaf850a7 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -95,6 +95,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
* - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
* - Debug ROM Address (MDCR_EL2_TDRA)
* - OS related registers (MDCR_EL2_TDOSA)
+ * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
*
* Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
@@ -110,8 +111,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
+ /*
+ * This also clears MDCR_EL2_E2PB_MASK to disable guest access
+ * to the profiling buffer.
+ */
vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
+ MDCR_EL2_TPMS |
MDCR_EL2_TPMCR |
MDCR_EL2_TDRA |
MDCR_EL2_TDOSA);
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 4ba5c9095d03..f5154ed3da6c 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -65,6 +65,66 @@
default: write_debug(ptr[0], reg, 0); \
}
+#define PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
+
+#define PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
+#define PMBLIMITR_EL1_E BIT(0)
+
+#define PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
+#define PMBIDR_EL1_P BIT(4)
+
+#define psb_csync() asm volatile("hint #17")
+
+static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
+{
+ /* The vcpu can run. but it can't hide. */
+}
+
+static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
+{
+ u64 reg;
+
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
+ return;
+
+ /* Yes; is it owned by EL3? */
+ reg = read_sysreg_s(PMBIDR_EL1);
+ if (reg & PMBIDR_EL1_P)
+ return;
+
+ /* No; is the host actually using the thing? */
+ reg = read_sysreg_s(PMBLIMITR_EL1);
+ if (!(reg & PMBLIMITR_EL1_E))
+ return;
+
+ /* Yes; save the control register and disable data generation */
+ *pmscr_el1 = read_sysreg_s(PMSCR_EL1);
+ write_sysreg_s(0, PMSCR_EL1);
+ isb();
+
+ /* Now drain all buffered data to memory */
+ psb_csync();
+ dsb(nsh);
+}
+
+static hyp_alternate_select(__debug_save_spe,
+ __debug_save_spe_nvhe, __debug_save_spe_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+{
+ if (!pmscr_el1)
+ return;
+
+ /* The host page table is installed, but not yet synchronised */
+ isb();
+
+ /* Re-enable data generation */
+ write_sysreg_s(pmscr_el1, PMSCR_EL1);
+}
+
void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
@@ -118,13 +178,15 @@ void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
(vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
- __debug_save_state(vcpu, &vcpu->arch.host_debug_state,
+ __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
kern_hyp_va(vcpu->arch.host_cpu_context));
+ __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
}
void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
{
- __debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
+ __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
kern_hyp_va(vcpu->arch.host_cpu_context));
if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 75e83dd40d43..aede1658aeda 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -103,7 +103,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
static void __hyp_text __deactivate_traps_vhe(void)
{
extern char vectors[]; /* kernel exception vectors */
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -111,6 +117,12 @@ static void __hyp_text __deactivate_traps_vhe(void)
static void __hyp_text __deactivate_traps_nvhe(void)
{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+ mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_RW, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
}
@@ -132,7 +144,6 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
__deactivate_traps_arch()();
write_sysreg(0, hstr_el2);
- write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
write_sysreg(0, pmuserenr_el0);
}
@@ -357,6 +368,10 @@ again:
}
__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+ /*
+ * This must come after restoring the host sysregs, since a non-VHE
+ * system may enable SPE here and make use of the TTBRs.
+ */
__debug_cond_restore_host_state(vcpu);
return exit_code;
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 88e2f2b938f0..e8e7ba2bc11f 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
*/
#include <asm/kvm_hyp.h>
+#include <asm/tlbflush.h>
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
@@ -32,7 +33,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
* whole of Stage-1. Weep...
*/
ipa >>= 12;
- asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+ __tlbi(ipas2e1is, ipa);
/*
* We have to ensure completion of the invalidation at Stage-2,
@@ -41,7 +42,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
* the Stage-1 invalidation happened first.
*/
dsb(ish);
- asm volatile("tlbi vmalle1is" : : );
+ __tlbi(vmalle1is);
dsb(ish);
isb();
@@ -57,7 +58,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
- asm volatile("tlbi vmalls12e1is" : : );
+ __tlbi(vmalls12e1is);
dsb(ish);
isb();
@@ -72,7 +73,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
- asm volatile("tlbi vmalle1" : : );
+ __tlbi(vmalle1);
dsb(nsh);
isb();
@@ -82,7 +83,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
void __hyp_text __kvm_flush_vm_context(void)
{
dsb(ishst);
- asm volatile("tlbi alle1is \n"
- "ic ialluis ": : );
+ __tlbi(alle1is);
+ asm volatile("ic ialluis" : : );
dsb(ish);
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e95d4f68bf54..d9e9697de1b2 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -46,6 +46,11 @@ static const struct kvm_regs default_regs_reset32 = {
COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
};
+static const struct kvm_irq_level default_ptimer_irq = {
+ .irq = 30,
+ .level = 1,
+};
+
static const struct kvm_irq_level default_vtimer_irq = {
.irq = 27,
.level = 1,
@@ -104,6 +109,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
const struct kvm_irq_level *cpu_vtimer_irq;
+ const struct kvm_irq_level *cpu_ptimer_irq;
const struct kvm_regs *cpu_reset;
switch (vcpu->arch.target) {
@@ -117,6 +123,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
cpu_vtimer_irq = &default_vtimer_irq;
+ cpu_ptimer_irq = &default_ptimer_irq;
break;
}
@@ -130,5 +137,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_reset(vcpu);
/* Reset timer */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 87e7e6608cd8..0e26f8c2b56f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -820,6 +820,61 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
+static bool access_cntp_tval(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ u64 now = kvm_phys_timer_read();
+
+ if (p->is_write)
+ ptimer->cnt_cval = p->regval + now;
+ else
+ p->regval = ptimer->cnt_cval - now;
+
+ return true;
+}
+
+static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ if (p->is_write) {
+ /* ISTATUS bit is read-only */
+ ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT;
+ } else {
+ u64 now = kvm_phys_timer_read();
+
+ p->regval = ptimer->cnt_ctl;
+ /*
+ * Set ISTATUS bit if it's expired.
+ * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
+ * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
+ * regardless of ENABLE bit for our implementation convenience.
+ */
+ if (ptimer->cnt_cval <= now)
+ p->regval |= ARCH_TIMER_CTRL_IT_STAT;
+ }
+
+ return true;
+}
+
+static bool access_cntp_cval(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ if (p->is_write)
+ ptimer->cnt_cval = p->regval;
+ else
+ p->regval = ptimer->cnt_cval;
+
+ return true;
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -1029,6 +1084,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
NULL, reset_unknown, TPIDRRO_EL0 },
+ /* CNTP_TVAL_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000),
+ access_cntp_tval },
+ /* CNTP_CTL_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001),
+ access_cntp_ctl },
+ /* CNTP_CVAL_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010),
+ access_cntp_cval },
+
/* PMEVCNTRn_EL0 */
PMU_PMEVCNTR_EL0(0),
PMU_PMEVCNTR_EL0(1),
@@ -1795,6 +1860,17 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
}
}
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num)
+{
+ if (!index_to_params(id, params))
+ return NULL;
+
+ return find_reg(params, table, num);
+}
+
/* Decode an index value, and find the sys_reg_desc entry. */
static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
u64 id)
@@ -1807,11 +1883,8 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
return NULL;
- if (!index_to_params(id, &params))
- return NULL;
-
table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg(&params, table, num);
+ r = find_reg_by_id(id, &params, table, num);
if (!r)
r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
@@ -1918,10 +1991,8 @@ static int get_invariant_sys_reg(u64 id, void __user *uaddr)
struct sys_reg_params params;
const struct sys_reg_desc *r;
- if (!index_to_params(id, &params))
- return -ENOENT;
-
- r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+ r = find_reg_by_id(id, &params, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
@@ -1935,9 +2006,8 @@ static int set_invariant_sys_reg(u64 id, void __user *uaddr)
int err;
u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
- if (!index_to_params(id, &params))
- return -ENOENT;
- r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+ r = find_reg_by_id(id, &params, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index dbbb01cfbee9..9c6ffd0f0196 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -136,6 +136,10 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
return i1->Op2 - i2->Op2;
}
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num);
#define Op0(_x) .Op0 = _x
#define Op1(_x) .Op1 = _x
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
new file mode 100644
index 000000000000..79f37e37d367
--- /dev/null
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -0,0 +1,346 @@
+/*
+ * VGIC system registers handling functions for AArch64 mode
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include "vgic.h"
+#include "sys_regs.h"
+
+static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v;
+ struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_vmcr vmcr;
+ u64 val;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ val = p->regval;
+
+ /*
+ * Disallow restoring VM state if not supported by this
+ * hardware.
+ */
+ host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >>
+ ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1;
+ if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
+ return false;
+
+ vgic_v3_cpu->num_pri_bits = host_pri_bits;
+
+ host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >>
+ ICC_CTLR_EL1_ID_BITS_SHIFT;
+ if (host_id_bits > vgic_v3_cpu->num_id_bits)
+ return false;
+
+ vgic_v3_cpu->num_id_bits = host_id_bits;
+
+ host_seis = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT);
+ seis = (val & ICC_CTLR_EL1_SEIS_MASK) >>
+ ICC_CTLR_EL1_SEIS_SHIFT;
+ if (host_seis != seis)
+ return false;
+
+ host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT);
+ a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT;
+ if (host_a3v != a3v)
+ return false;
+
+ /*
+ * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
+ * The vgic_set_vmcr() will convert to ICH_VMCR layout.
+ */
+ vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
+ vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ val = 0;
+ val |= (vgic_v3_cpu->num_pri_bits - 1) <<
+ ICC_CTLR_EL1_PRI_BITS_SHIFT;
+ val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ val |= ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) <<
+ ICC_CTLR_EL1_SEIS_SHIFT;
+ val |= ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) <<
+ ICC_CTLR_EL1_A3V_SHIFT;
+ /*
+ * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
+ * Extract it directly using ICC_CTLR_EL1 reg definitions.
+ */
+ val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
+ val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
+
+ p->regval = val;
+ }
+
+ return true;
+}
+
+static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >>
+ ICC_BPR0_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) &
+ ICC_BPR0_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ if (!p->is_write)
+ p->regval = 0;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
+ if (p->is_write) {
+ vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
+ ICC_BPR1_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) &
+ ICC_BPR1_EL1_MASK;
+ }
+ } else {
+ if (!p->is_write)
+ p->regval = min((vmcr.bpr + 1), 7U);
+ }
+
+ return true;
+}
+
+static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >>
+ ICC_IGRPEN0_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) &
+ ICC_IGRPEN0_EL1_MASK;
+ }
+
+ return true;
+}
+
+static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ if (p->is_write) {
+ vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >>
+ ICC_IGRPEN1_EL1_SHIFT;
+ vgic_set_vmcr(vcpu, &vmcr);
+ } else {
+ p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) &
+ ICC_IGRPEN1_EL1_MASK;
+ }
+
+ return true;
+}
+
+static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p, u8 apr, u8 idx)
+{
+ struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+ uint32_t *ap_reg;
+
+ if (apr)
+ ap_reg = &vgicv3->vgic_ap1r[idx];
+ else
+ ap_reg = &vgicv3->vgic_ap0r[idx];
+
+ if (p->is_write)
+ *ap_reg = p->regval;
+ else
+ p->regval = *ap_reg;
+}
+
+static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r, u8 apr)
+{
+ struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
+ u8 idx = r->Op2 & 3;
+
+ /*
+ * num_pri_bits are initialized with HW supported values.
+ * We can rely safely on num_pri_bits even if VM has not
+ * restored ICC_CTLR_EL1 before restoring APnR registers.
+ */
+ switch (vgic_v3_cpu->num_pri_bits) {
+ case 7:
+ vgic_v3_access_apr_reg(vcpu, p, apr, idx);
+ break;
+ case 6:
+ if (idx > 1)
+ goto err;
+ vgic_v3_access_apr_reg(vcpu, p, apr, idx);
+ break;
+ default:
+ if (idx > 0)
+ goto err;
+ vgic_v3_access_apr_reg(vcpu, p, apr, idx);
+ }
+
+ return true;
+err:
+ if (!p->is_write)
+ p->regval = 0;
+
+ return false;
+}
+
+static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+
+{
+ return access_gic_aprn(vcpu, p, r, 0);
+}
+
+static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return access_gic_aprn(vcpu, p, r, 1);
+}
+
+static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /* Validate SRE bit */
+ if (p->is_write) {
+ if (!(p->regval & ICC_SRE_EL1_SRE))
+ return false;
+ } else {
+ p->regval = vgicv3->vgic_sre;
+ }
+
+ return true;
+}
+static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
+ /* ICC_PMR_EL1 */
+ { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr },
+ /* ICC_BPR0_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 },
+ /* ICC_AP0R0_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r },
+ /* ICC_AP0R1_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r },
+ /* ICC_AP0R2_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r },
+ /* ICC_AP0R3_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r },
+ /* ICC_AP1R0_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r },
+ /* ICC_AP1R1_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r },
+ /* ICC_AP1R2_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r },
+ /* ICC_AP1R3_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r },
+ /* ICC_BPR1_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 },
+ /* ICC_CTLR_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr },
+ /* ICC_SRE_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre },
+ /* ICC_IGRPEN0_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 },
+ /* ICC_GRPEN1_EL1 */
+ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 },
+};
+
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg)
+{
+ struct sys_reg_params params;
+ u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
+
+ params.regval = *reg;
+ params.is_write = is_write;
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+
+ if (find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs)))
+ return 0;
+
+ return -ENXIO;
+}
+
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+ u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
+
+ if (is_write)
+ params.regval = *reg;
+ params.is_write = is_write;
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+
+ r = find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
+ if (!r)
+ return -ENXIO;
+
+ if (!r->access(vcpu, &params, r))
+ return -EINVAL;
+
+ if (!is_write)
+ *reg = params.regval;
+
+ return 0;
+}
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index e703fb9defad..9b0ba191e48e 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -6,6 +6,8 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o
obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
+KASAN_SANITIZE_physaddr.o += n
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 4c63cb154859..68634c630cdd 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -79,6 +79,13 @@ void verify_cpu_asid_bits(void)
}
}
+static void set_reserved_asid_bits(void)
+{
+ if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
+ __set_bit(FALKOR_RESERVED_ASID, asid_map);
+}
+
static void flush_context(unsigned int cpu)
{
int i;
@@ -87,6 +94,8 @@ static void flush_context(unsigned int cpu)
/* Update the list of reserved ASIDs and the ASID bitmap. */
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+ set_reserved_asid_bits();
+
/*
* Ensure the generation bump is observed before we xchg the
* active_asids.
@@ -244,6 +253,8 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
+ set_reserved_asid_bits();
+
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index e04082700bb1..351f7595cb3e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -211,7 +211,8 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
dma_addr_t dev_addr;
dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
return dev_addr;
@@ -222,7 +223,8 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
}
@@ -235,7 +237,8 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
int i, ret;
ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
for_each_sg(sgl, sg, ret, i)
__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
@@ -251,7 +254,8 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev,
struct scatterlist *sg;
int i;
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
for_each_sg(sgl, sg, nelems, i)
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
@@ -352,6 +356,13 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
return 1;
}
+static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
+{
+ if (swiotlb)
+ return swiotlb_dma_mapping_error(hwdev, addr);
+ return 0;
+}
+
static struct dma_map_ops swiotlb_dma_ops = {
.alloc = __dma_alloc,
.free = __dma_free,
@@ -366,7 +377,7 @@ static struct dma_map_ops swiotlb_dma_ops = {
.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = __swiotlb_sync_sg_for_device,
.dma_supported = __swiotlb_dma_supported,
- .mapping_error = swiotlb_dma_mapping_error,
+ .mapping_error = __swiotlb_dma_mapping_error,
};
static int __init atomic_pool_init(void)
@@ -558,7 +569,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
unsigned long attrs)
{
bool coherent = is_device_dma_coherent(dev);
- int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+ int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
size_t iosize = size;
void *addr;
@@ -712,7 +723,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
bool coherent = is_device_dma_coherent(dev);
- int prot = dma_direction_to_prot(dir, coherent);
+ int prot = dma_info_to_prot(dir, coherent, attrs);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
if (!iommu_dma_mapping_error(dev, dev_addr) &&
@@ -770,7 +781,7 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
return iommu_dma_map_sg(dev, sgl, nelems,
- dma_direction_to_prot(dir, coherent));
+ dma_info_to_prot(dir, coherent, attrs));
}
static void __iommu_unmap_sg_attrs(struct device *dev,
@@ -799,7 +810,6 @@ static struct dma_map_ops iommu_dma_ops = {
.sync_sg_for_device = __iommu_sync_sg_for_device,
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
- .dma_supported = iommu_dma_supported,
.mapping_error = iommu_dma_mapping_error,
};
@@ -831,14 +841,21 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
* then the IOMMU core will have already configured a group for this
* device, and allocated the default domain for that group.
*/
- if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
- pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
- dev_name(dev));
- return false;
+ if (!domain)
+ goto out_err;
+
+ if (domain->type == IOMMU_DOMAIN_DMA) {
+ if (iommu_dma_init_domain(domain, dma_base, size, dev))
+ goto out_err;
+
+ dev->archdata.dma_ops = &iommu_dma_ops;
}
- dev->archdata.dma_ops = &iommu_dma_ops;
return true;
+out_err:
+ pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+ dev_name(dev));
+ return false;
}
static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 156169c6981b..81283851c9af 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -691,17 +691,3 @@ int cpu_enable_pan(void *__unused)
return 0;
}
#endif /* CONFIG_ARM64_PAN */
-
-#ifdef CONFIG_ARM64_UAO
-/*
- * Kernel threads have fs=KERNEL_DS by default, and don't need to call
- * set_fs(), devtmpfs in particular relies on this behaviour.
- * We need to enable the feature at runtime (instead of adding it to
- * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
- */
-int cpu_enable_uao(void *__unused)
-{
- asm(SET_PSTATE_UAO(1));
- return 0;
-}
-#endif /* CONFIG_ARM64_UAO */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 964b7549af5c..e25584d72396 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -239,7 +239,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
*cpte, &pgsize);
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
- changed = ptep_set_access_flags(vma, addr, cpte,
+ changed |= ptep_set_access_flags(vma, addr, cpte,
pfn_pte(pfn,
hugeprot),
dirty);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 716d1226ba69..e19e06593e37 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@
#include <linux/efi.h>
#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -209,8 +210,8 @@ void __init arm64_memblock_init(void)
* linear mapping. Take care not to clip the kernel which may be
* high in memory.
*/
- memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
- ULLONG_MAX);
+ memblock_remove(max_t(u64, memstart_addr + linear_region_size,
+ __pa_symbol(_end)), ULLONG_MAX);
if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
/* ensure that memstart_addr remains sufficiently aligned */
memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
@@ -225,7 +226,7 @@ void __init arm64_memblock_init(void)
*/
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
memblock_mem_limit_remove_map(memory_limit);
- memblock_add(__pa(_text), (u64)(_end - _text));
+ memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
@@ -278,7 +279,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
- memblock_reserve(__pa(_text), _end - _text);
+ memblock_reserve(__pa_symbol(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);
@@ -404,6 +405,8 @@ void __init mem_init(void)
if (swiotlb_force == SWIOTLB_FORCE ||
max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
swiotlb_init(1);
+ else
+ swiotlb_force = SWIOTLB_NO_FORCE;
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
@@ -484,7 +487,8 @@ void __init mem_init(void)
void free_initmem(void)
{
- free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
+ free_reserved_area(lm_alias(__init_begin),
+ lm_alias(__init_end),
0, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 01e88c8bcab0..c4c8cd4c31d4 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -88,7 +88,7 @@ void __iounmap(volatile void __iomem *io_addr)
* We could get an address outside vmalloc range in case
* of ioremap_cache() reusing a RAM mapping.
*/
- if (VMALLOC_START <= addr && addr < VMALLOC_END)
+ if (is_vmalloc_addr((void *)addr))
vunmap((void *)addr);
}
EXPORT_SYMBOL(__iounmap);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 757009daa9ed..201d918e7575 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/memblock.h>
#include <linux/start_kernel.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/kernel-pgtable.h>
@@ -26,6 +27,13 @@
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). All the early functions are called too
+ * early to use lm_alias so __p*d_populate functions must be used to populate
+ * with the physical address from __pa_symbol.
+ */
+
static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long end)
{
@@ -33,12 +41,12 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long next;
if (pmd_none(*pmd))
- pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+ __pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE);
pte = pte_offset_kimg(pmd, addr);
do {
next = addr + PAGE_SIZE;
- set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+ set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page),
PAGE_KERNEL));
} while (pte++, addr = next, addr != end && pte_none(*pte));
}
@@ -51,7 +59,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
unsigned long next;
if (pud_none(*pud))
- pud_populate(&init_mm, pud, kasan_zero_pmd);
+ __pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE);
pmd = pmd_offset_kimg(pud, addr);
do {
@@ -68,7 +76,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
unsigned long next;
if (pgd_none(*pgd))
- pgd_populate(&init_mm, pgd, kasan_zero_pud);
+ __pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE);
pud = pud_offset_kimg(pgd, addr);
do {
@@ -148,7 +156,7 @@ void __init kasan_init(void)
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst);
- cpu_replace_ttbr1(tmp_pg_dir);
+ cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@@ -199,10 +207,10 @@ void __init kasan_init(void)
*/
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_zero_pte[i],
- pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+ pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
memset(kasan_zero_page, 0, PAGE_SIZE);
- cpu_replace_ttbr1(swapper_pg_dir);
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 17243e43184e..b805c017f789 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,6 +28,7 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/io.h>
+#include <linux/mm.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -359,8 +360,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
{
- unsigned long kernel_start = __pa(_text);
- unsigned long kernel_end = __pa(__init_begin);
+ phys_addr_t kernel_start = __pa_symbol(_text);
+ phys_addr_t kernel_end = __pa_symbol(__init_begin);
/*
* Take care not to create a writable alias for the
@@ -427,14 +428,14 @@ void mark_rodata_ro(void)
unsigned long section_size;
section_size = (unsigned long)_etext - (unsigned long)_text;
- create_mapping_late(__pa(_text), (unsigned long)_text,
+ create_mapping_late(__pa_symbol(_text), (unsigned long)_text,
section_size, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use __init_begin rather than __end_rodata
* to cover NOTES and EXCEPTION_TABLE.
*/
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
- create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+ create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
/* flush the TLBs after updating live kernel mappings */
@@ -446,7 +447,7 @@ void mark_rodata_ro(void)
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma)
{
- phys_addr_t pa_start = __pa(va_start);
+ phys_addr_t pa_start = __pa_symbol(va_start);
unsigned long size = va_end - va_start;
BUG_ON(!PAGE_ALIGNED(pa_start));
@@ -494,7 +495,7 @@ static void __init map_kernel(pgd_t *pgd)
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
- __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+ __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
pud_clear_fixmap();
} else {
BUG();
@@ -524,8 +525,8 @@ void __init paging_init(void)
* To do this we need to go via a temporary pgd.
*/
cpu_replace_ttbr1(__va(pgd_phys));
- memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
- cpu_replace_ttbr1(swapper_pg_dir);
+ memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
pgd_clear_fixmap();
memblock_free(pgd_phys, PAGE_SIZE);
@@ -534,7 +535,7 @@ void __init paging_init(void)
* We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
* allocated with it.
*/
- memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+ memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
SWAPPER_DIR_SIZE - PAGE_SIZE);
}
@@ -645,6 +646,12 @@ static inline pte_t * fixmap_pte(unsigned long addr)
return &bm_pte[pte_index(addr)];
}
+/*
+ * The p*d_populate functions call virt_to_phys implicitly so they can't be used
+ * directly on kernel symbols (bm_p*d). This function is called too early to use
+ * lm_alias so __p*d_populate functions must be used to populate with the
+ * physical address from __pa_symbol.
+ */
void __init early_fixmap_init(void)
{
pgd_t *pgd;
@@ -654,7 +661,7 @@ void __init early_fixmap_init(void)
pgd = pgd_offset_k(addr);
if (CONFIG_PGTABLE_LEVELS > 3 &&
- !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+ !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
@@ -663,12 +670,14 @@ void __init early_fixmap_init(void)
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pud = pud_offset_kimg(pgd, addr);
} else {
- pgd_populate(&init_mm, pgd, bm_pud);
+ if (pgd_none(*pgd))
+ __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
pud = fixmap_pud(addr);
}
- pud_populate(&init_mm, pud, bm_pmd);
+ if (pud_none(*pud))
+ __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
pmd = fixmap_pmd(addr);
- pmd_populate_kernel(&init_mm, pmd, bm_pte);
+ __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
/*
* The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
new file mode 100644
index 000000000000..91371daf397c
--- /dev/null
+++ b/arch/arm64/mm/physaddr.c
@@ -0,0 +1,30 @@
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/memory.h>
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+ WARN(!__is_lm_address(x),
+ "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ (void *)x,
+ (void *)x);
+
+ return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+ /*
+ * This is bounds checking against the kernel image only.
+ * __pa_symbol should only be used on kernel symbol addresses.
+ */
+ VIRTUAL_BUG_ON(x < (unsigned long) KERNEL_START ||
+ x > (unsigned long) KERNEL_END);
+ return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 32682be978e0..cd4d53d7e458 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,6 +138,7 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
+ pre_ttbr0_update_workaround x0, x1, x2
mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index b2fc97a2c56c..a785554916c0 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -813,11 +813,6 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
-void bpf_jit_compile(struct bpf_prog *prog)
-{
- /* Nothing to do here. We support Internal BPF. */
-}
-
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;
@@ -903,7 +898,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_flush_icache(header, ctx.image + ctx.idx);
- set_memory_ro((unsigned long)header, header->pages);
+ bpf_jit_binary_lock_ro(header);
prog->bpf_func = (void *)ctx.image;
prog->jited = 1;
@@ -915,18 +910,3 @@ out:
tmp : orig_prog);
return prog;
}
-
-void bpf_jit_free(struct bpf_prog *prog)
-{
- unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
- struct bpf_binary_header *header = (void *)addr;
-
- if (!prog->jited)
- goto free_filter;
-
- set_memory_rw(addr, header->pages);
- bpf_jit_binary_free(header);
-
-free_filter:
- bpf_prog_unlock_free(prog);
-}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 947830a459d2..401ceb71540c 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -84,6 +84,7 @@ HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
HYPERCALL2(vm_assist);
+HYPERCALL3(dm_op);
ENTRY(privcmd_call)
mov x16, x0