diff options
Diffstat (limited to 'arch')
138 files changed, 1378 insertions, 861 deletions
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index d5b3ed2c58f5..c380d8c30704 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -90,10 +90,12 @@ static void show_faulting_vma(unsigned long address) */ if (vma) { char buf[ARC_PATH_MAX]; - char *nm = "?"; + char *nm = "anon"; if (vma->vm_file) { - nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1); + /* XXX: can we use %pD below and get rid of buf? */ + nm = d_path(file_user_path(vma->vm_file), buf, + ARC_PATH_MAX-1); if (IS_ERR(nm)) nm = "?"; } diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index b63bd4ad3143..88a4b0d6d928 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -64,7 +64,8 @@ compatible = "arm,armv7-timer"; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>, <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>, - <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; arm,cpu-registers-not-fw-configured; clock-frequency = <24000000>; }; @@ -233,7 +234,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044000 0x20>; interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER0>; clock-names = "pclk", "timer"; }; @@ -241,7 +242,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044020 0x20>; interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER1>; clock-names = "pclk", "timer"; }; @@ -249,7 +250,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044040 0x20>; interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER2>; clock-names = "pclk", "timer"; }; @@ -257,7 +258,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044060 0x20>; interrupts = <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER3>; clock-names = "pclk", "timer"; }; @@ -265,7 +266,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044080 0x20>; interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER4>; clock-names = "pclk", "timer"; }; @@ -273,7 +274,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x200440a0 0x20>; interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER5>; clock-names = "pclk", "timer"; }; @@ -426,7 +427,7 @@ i2c0: i2c@20072000 { compatible = "rockchip,rk3128-i2c", "rockchip,rk3288-i2c"; - reg = <20072000 0x1000>; + reg = <0x20072000 0x1000>; interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; clock-names = "i2c"; clocks = <&cru PCLK_I2C0>; @@ -458,6 +459,7 @@ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>; arm,pl330-broken-no-flushp; + arm,pl330-periph-burst; clocks = <&cru ACLK_DMAC>; clock-names = "apb_pclk"; #dma-cells = <1>; diff --git a/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi b/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi index 7ae8b620515c..59f546a278f8 100644 --- a/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi @@ -109,6 +109,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49022000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP1_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; @@ -142,6 +144,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49024000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP2_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; @@ -175,6 +179,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49026000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP3_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi b/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi index 46b8f9efd413..3fcef3080eae 100644 --- a/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi @@ -2043,6 +2043,8 @@ compatible = "ti,omap4-mcbsp"; reg = <0x0 0xff>; /* L4 Interconnect */ reg-names = "mpu"; + clocks = <&l4_per_clkctrl OMAP4_MCBSP4_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi b/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi index a03bca5a3584..97b0c3b5f573 100644 --- a/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi @@ -109,6 +109,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49022000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP1_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; @@ -142,6 +144,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49024000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP2_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; @@ -175,6 +179,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49026000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP3_CLKCTRL 24>; + clock-names = "fck"; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 9808cd27e2cf..67de96c7717d 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -550,6 +550,7 @@ static struct platform_device *ams_delta_devices[] __initdata = { &ams_delta_nand_device, &ams_delta_lcd_device, &cx20442_codec_device, + &modem_nreset_device, }; static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = { @@ -782,26 +783,28 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = { { }, }; +static int ams_delta_modem_pm_activate(struct device *dev) +{ + modem_priv.regulator = regulator_get(dev, "RESET#"); + if (IS_ERR(modem_priv.regulator)) + return -EPROBE_DEFER; + + return 0; +} + +static struct dev_pm_domain ams_delta_modem_pm_domain = { + .activate = ams_delta_modem_pm_activate, +}; + static struct platform_device ams_delta_modem_device = { .name = "serial8250", .id = PLAT8250_DEV_PLATFORM1, .dev = { .platform_data = ams_delta_modem_ports, + .pm_domain = &ams_delta_modem_pm_domain, }, }; -static int __init modem_nreset_init(void) -{ - int err; - - err = platform_device_register(&modem_nreset_device); - if (err) - pr_err("Couldn't register the modem regulator device\n"); - - return err; -} - - /* * This function expects MODEM IRQ number already assigned to the port. * The MODEM device requires its RESET# pin kept high during probe. @@ -833,37 +836,6 @@ static int __init ams_delta_modem_init(void) } arch_initcall_sync(ams_delta_modem_init); -static int __init late_init(void) -{ - int err; - - err = modem_nreset_init(); - if (err) - return err; - - /* - * Once the modem device is registered, the modem_nreset - * regulator can be requested on behalf of that device. - */ - modem_priv.regulator = regulator_get(&ams_delta_modem_device.dev, - "RESET#"); - if (IS_ERR(modem_priv.regulator)) { - err = PTR_ERR(modem_priv.regulator); - goto unregister; - } - return 0; - -unregister: - platform_device_unregister(&ams_delta_modem_device); - return err; -} - -static void __init ams_delta_init_late(void) -{ - omap1_init_late(); - late_init(); -} - static void __init ams_delta_map_io(void) { omap1_map_io(); @@ -877,7 +849,7 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)") .init_early = omap1_init_early, .init_irq = omap1_init_irq, .init_machine = ams_delta_init, - .init_late = ams_delta_init_late, + .init_late = omap1_init_late, .init_time = omap1_timer_init, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c index 410d17d1d443..f618a6df2938 100644 --- a/arch/arm/mach-omap1/timer32k.c +++ b/arch/arm/mach-omap1/timer32k.c @@ -176,17 +176,18 @@ static u64 notrace omap_32k_read_sched_clock(void) return sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0; } +static struct timespec64 persistent_ts; +static cycles_t cycles; +static unsigned int persistent_mult, persistent_shift; + /** * omap_read_persistent_clock64 - Return time from a persistent clock. + * @ts: &struct timespec64 for the returned time * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into * nsecs and adds to a monotonically increasing timespec64. */ -static struct timespec64 persistent_ts; -static cycles_t cycles; -static unsigned int persistent_mult, persistent_shift; - static void omap_read_persistent_clock64(struct timespec64 *ts) { unsigned long long nsecs; @@ -206,10 +207,9 @@ static void omap_read_persistent_clock64(struct timespec64 *ts) /** * omap_init_clocksource_32k - setup and register counter 32k as a * kernel clocksource - * @pbase: base addr of counter_32k module - * @size: size of counter_32k to map + * @vbase: base addr of counter_32k module * - * Returns 0 upon success or negative error code upon failure. + * Returns: %0 upon success or negative error code upon failure. * */ static int __init omap_init_clocksource_32k(void __iomem *vbase) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 1e17b5f77588..ba71928c0fcb 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2209,7 +2209,7 @@ int omap_hwmod_parse_module_range(struct omap_hwmod *oh, return err; pr_debug("omap_hwmod: %s %pOFn at %pR\n", - oh->name, np, &res); + oh->name, np, res); if (oh && oh->mpu_rt_idx) { omap_hwmod_fix_mpu_rt_idx(oh, np, res); diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 6f85a05ee7e1..dcf6e4846ac9 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -185,7 +185,7 @@ #size-cells = <1>; ranges; - anomix_ns_gpr: syscon@44210000 { + aonmix_ns_gpr: syscon@44210000 { compatible = "fsl,imx93-aonmix-ns-syscfg", "syscon"; reg = <0x44210000 0x1000>; }; @@ -319,6 +319,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&aonmix_ns_gpr 0x14 0>; status = "disabled"; }; @@ -591,6 +592,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&wakeupmix_gpr 0x0c 2>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 36ef2dbe8add..3ee9266fa8e9 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -905,7 +905,7 @@ status = "disabled"; }; - sata_phy: t-phy@1a243000 { + sata_phy: t-phy { compatible = "mediatek,mt7622-tphy", "mediatek,generic-tphy-v1"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 68539ea788df..24eda00e320d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -434,7 +434,7 @@ }; }; - pcie_phy: t-phy@11c00000 { + pcie_phy: t-phy { compatible = "mediatek,mt7986-tphy", "mediatek,generic-tphy-v2"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index b2485ddfd33b..5d635085fe3f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -48,7 +48,7 @@ memory@40000000 { device_type = "memory"; - reg = <0 0x40000000 0 0x80000000>; + reg = <0 0x40000000 0x2 0x00000000>; }; reserved-memory { @@ -56,13 +56,8 @@ #size-cells = <2>; ranges; - /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ - bl31_secmon_reserved: secmon@54600000 { - no-map; - reg = <0 0x54600000 0x0 0x200000>; - }; - - /* 12 MiB reserved for OP-TEE (BL32) + /* + * 12 MiB reserved for OP-TEE (BL32) * +-----------------------+ 0x43e0_0000 * | SHMEM 2MiB | * +-----------------------+ 0x43c0_0000 @@ -75,6 +70,34 @@ no-map; reg = <0 0x43200000 0 0x00c00000>; }; + + scp_mem: memory@50000000 { + compatible = "shared-dma-pool"; + reg = <0 0x50000000 0 0x2900000>; + no-map; + }; + + vpu_mem: memory@53000000 { + compatible = "shared-dma-pool"; + reg = <0 0x53000000 0 0x1400000>; /* 20 MB */ + }; + + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ + bl31_secmon_mem: memory@54600000 { + no-map; + reg = <0 0x54600000 0x0 0x200000>; + }; + + snd_dma_mem: memory@60000000 { + compatible = "shared-dma-pool"; + reg = <0 0x60000000 0 0x1100000>; + no-map; + }; + + apu_mem: memory@62000000 { + compatible = "shared-dma-pool"; + reg = <0 0x62000000 0 0x1400000>; /* 20 MB */ + }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index a9e52b50c8c4..54c674c45b49 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -313,6 +313,7 @@ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH 0>; cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>; + status = "fail"; }; dmic_codec: dmic-codec { diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts index 385b178314db..3067a4091a7a 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts @@ -62,25 +62,23 @@ stdout-path = "serial0:115200n8"; }; - clocks { - divclk4: divclk4 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk4"; + div1_mclk: divclk1 { + compatible = "gpio-gate-clock"; + pinctrl-0 = <&audio_mclk>; + pinctrl-names = "default"; + clocks = <&rpmcc RPM_SMD_DIV_CLK1>; + #clock-cells = <0>; + enable-gpios = <&pm8994_gpios 15 0>; + }; - pinctrl-names = "default"; - pinctrl-0 = <&divclk4_pin_a>; - }; + divclk4: divclk4 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk4"; - div1_mclk: divclk1 { - compatible = "gpio-gate-clock"; - pinctrl-0 = <&audio_mclk>; - pinctrl-names = "default"; - clocks = <&rpmcc RPM_SMD_DIV_CLK1>; - #clock-cells = <0>; - enable-gpios = <&pm8994_gpios 15 0>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk4_pin_a>; }; gpio-keys { diff --git a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi index bcd2397eb373..06f8ff624181 100644 --- a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi @@ -11,26 +11,24 @@ #include <dt-bindings/pinctrl/qcom,pmic-gpio.h> / { - clocks { - divclk1_cdc: divclk1 { - compatible = "gpio-gate-clock"; - clocks = <&rpmcc RPM_SMD_DIV_CLK1>; - #clock-cells = <0>; - enable-gpios = <&pm8994_gpios 15 GPIO_ACTIVE_HIGH>; + divclk1_cdc: divclk1 { + compatible = "gpio-gate-clock"; + clocks = <&rpmcc RPM_SMD_DIV_CLK1>; + #clock-cells = <0>; + enable-gpios = <&pm8994_gpios 15 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&divclk1_default>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk1_default>; + }; - divclk4: divclk4 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk4"; + divclk4: divclk4 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk4"; - pinctrl-names = "default"; - pinctrl-0 = <&divclk4_pin_a>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk4_pin_a>; }; gpio-keys { diff --git a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts index d1066edaea47..f8e9d90afab0 100644 --- a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts +++ b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts @@ -20,16 +20,14 @@ qcom,pmic-id = <0x20009 0x2000a 0x00 0x00>; qcom,board-id = <31 0>; - clocks { - divclk2_haptics: divclk2 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk2"; - - pinctrl-names = "default"; - pinctrl-0 = <&divclk2_pin_a>; - }; + divclk2_haptics: divclk2 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk2"; + + pinctrl-names = "default"; + pinctrl-0 = <&divclk2_pin_a>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi b/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi index 3c3b6287cd27..eaa43f022a65 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi @@ -173,7 +173,7 @@ compatible = "qcom,pmm8654au-gpio", "qcom,spmi-gpio"; reg = <0x8800>; gpio-controller; - gpio-ranges = <&pmm8654au_2_gpios 0 0 12>; + gpio-ranges = <&pmm8654au_1_gpios 0 0 12>; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index a7c3020a5de4..06c53000bb74 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3958,7 +3958,7 @@ pdc: interrupt-controller@b220000 { compatible = "qcom,sm8150-pdc", "qcom,pdc"; - reg = <0 0x0b220000 0 0x400>; + reg = <0 0x0b220000 0 0x30000>; qcom,pdc-ranges = <0 480 94>, <94 609 31>, <125 63 1>; #interrupt-cells = <2>; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index 08a3ad3e7ae9..de0a1f2af983 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -68,15 +68,17 @@ simple-audio-card,format = "i2s"; simple-audio-card,name = "Haikou,I2S-codec"; simple-audio-card,mclk-fs = <512>; + simple-audio-card,frame-master = <&sgtl5000_codec>; + simple-audio-card,bitclock-master = <&sgtl5000_codec>; - simple-audio-card,codec { - clocks = <&sgtl5000_clk>; + sgtl5000_codec: simple-audio-card,codec { sound-dai = <&sgtl5000>; + // Prevent the dai subsystem from overwriting the clock + // frequency. We are using a fixed-frequency oscillator. + system-clock-fixed; }; simple-audio-card,cpu { - bitclock-master; - frame-master; sound-dai = <&i2s0_8ch>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 7dccbe8a9393..f2279aa6ca9e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -492,6 +492,7 @@ &i2s0 { pinctrl-0 = <&i2s0_2ch_bus>; + pinctrl-1 = <&i2s0_2ch_bus_bclk_off>; rockchip,capture-channels = <2>; rockchip,playback-channels = <2>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 9da0b6d77c8d..5bc2d4faeea6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -2457,6 +2457,16 @@ <4 RK_PA0 1 &pcfg_pull_none>; }; + i2s0_2ch_bus_bclk_off: i2s0-2ch-bus-bclk-off { + rockchip,pins = + <3 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>, + <3 RK_PD1 1 &pcfg_pull_none>, + <3 RK_PD2 1 &pcfg_pull_none>, + <3 RK_PD3 1 &pcfg_pull_none>, + <3 RK_PD7 1 &pcfg_pull_none>, + <4 RK_PA0 1 &pcfg_pull_none>; + }; + i2s0_8ch_bus: i2s0-8ch-bus { rockchip,pins = <3 RK_PD0 1 &pcfg_pull_none>, diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5882b2415596..1095c6647e96 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,14 +344,14 @@ */ #define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) #define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGITR_EL2_RES0 GENMASK(63, 57) #define __HFGITR_EL2_MASK GENMASK(54, 0) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6dcdae4d38cb..a1e24228aaaa 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; -static bool has_cntpoff(void) -{ - return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); -} - static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { if (vcpu_has_nv(vcpu)) { if (is_hyp_ctxt(vcpu)) { @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx) timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); - if (!has_cntpoff()) - cval -= timer_get_offset(ctx); + cval -= timer_get_offset(ctx); timer_set_cval(ctx, cval); @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); - if (!has_cntpoff()) - cval += offset; + cval += offset; write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 9ced1bf0c2b7..ee902ff2a50f 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -977,6 +977,8 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), + SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8c..448b17080d36 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); + if (has_cntpoff()) { + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * We're entrering the guest. Reload the correct + * values from memory now that TGE is clear. + */ + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); + + if (map.direct_ptimer) { + write_sysreg_el0(val, SYS_CNTP_CVAL); + isb(); + } + } + val = read_sysreg(cpacr_el1); val |= CPACR_ELx_TTA; val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | @@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (has_cntpoff()) { + struct timer_map map; + u64 val, offset; + + get_timer_map(vcpu, &map); + + /* + * We're exiting the guest. Save the latest CVAL value + * to memory and apply the offset now that TGE is set. + */ + val = read_sysreg_el0(SYS_CNTP_CVAL); + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + + offset = read_sysreg_s(SYS_CNTPOFF_EL2); + + if (map.direct_ptimer && offset) { + write_sysreg_el0(val + offset, SYS_CNTP_CVAL); + isb(); + } + } + /* * ARM errata 1165522 and 1530923 require the actual execution of the * above before we can switch to the EL2/EL0 translation regime used by diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 0eea225fd09a..a243934c5568 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu) + if (!kvm_arm_support_pmu_v3()) return; pmu->events_host &= ~clr; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e92ec810d449..0afd6136e275 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, - { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, - { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 0dcb36b32cb2..c486c2341b66 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -52,10 +52,9 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * @offset: bus address of the memory * @size: size of the resource to map */ -extern pgprot_t pgprot_wc; - #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(pgprot_wc)) + ioremap_prot((offset), (size), \ + pgprot_val(wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC)) #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index 81b0c4cfbf4f..e2eca1a25b4e 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -33,4 +33,12 @@ .cfi_endproc; \ SYM_END(name, SYM_T_FUNC) +#define SYM_CODE_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_CODE_END(name) \ + .cfi_endproc; \ + SYM_END(name, SYM_T_NONE) + #endif diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 35348d4c4209..21319c1e045c 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -105,13 +105,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) return __pgprot(prot); } +extern bool wc_enabled; + #define pgprot_writecombine pgprot_writecombine static inline pgprot_t pgprot_writecombine(pgprot_t _prot) { unsigned long prot = pgprot_val(_prot); - prot = (prot & ~_CACHE_MASK) | _CACHE_WUC; + prot = (prot & ~_CACHE_MASK) | (wc_enabled ? _CACHE_WUC : _CACHE_SUC); return __pgprot(prot); } diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 65518bb8f472..1ec8e4c4cc2b 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -18,7 +18,7 @@ .text .cfi_sections .debug_frame .align 5 -SYM_FUNC_START(handle_syscall) +SYM_CODE_START(handle_syscall) csrrd t0, PERCPU_BASE_KS la.pcrel t1, kernelsp add.d t1, t1, t0 @@ -71,7 +71,7 @@ SYM_FUNC_START(handle_syscall) bl do_syscall RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_syscall) +SYM_CODE_END(handle_syscall) _ASM_NOKPROBE(handle_syscall) SYM_CODE_START(ret_from_fork) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 78f066384657..2bb3aa2dcfcb 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -31,7 +31,7 @@ SYM_FUNC_START(__arch_cpu_idle) 1: jr ra SYM_FUNC_END(__arch_cpu_idle) -SYM_FUNC_START(handle_vint) +SYM_CODE_START(handle_vint) BACKUP_T0T1 SAVE_ALL la_abs t1, __arch_cpu_idle @@ -46,11 +46,11 @@ SYM_FUNC_START(handle_vint) la_abs t0, do_vint jirl ra, t0, 0 RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_vint) +SYM_CODE_END(handle_vint) -SYM_FUNC_START(except_vec_cex) +SYM_CODE_START(except_vec_cex) b cache_parity_error -SYM_FUNC_END(except_vec_cex) +SYM_CODE_END(except_vec_cex) .macro build_prep_badv csrrd t0, LOONGARCH_CSR_BADV @@ -66,7 +66,7 @@ SYM_FUNC_END(except_vec_cex) .macro BUILD_HANDLER exception handler prep .align 5 - SYM_FUNC_START(handle_\exception) + SYM_CODE_START(handle_\exception) 666: BACKUP_T0T1 SAVE_ALL @@ -76,7 +76,7 @@ SYM_FUNC_END(except_vec_cex) jirl ra, t0, 0 668: RESTORE_ALL_AND_RET - SYM_FUNC_END(handle_\exception) + SYM_CODE_END(handle_\exception) SYM_DATA(unwind_hint_\exception, .word 668b - 666b) .endm @@ -93,7 +93,7 @@ SYM_FUNC_END(except_vec_cex) BUILD_HANDLER watch watch none BUILD_HANDLER reserved reserved none /* others */ -SYM_FUNC_START(handle_sys) +SYM_CODE_START(handle_sys) la_abs t0, handle_syscall jr t0 -SYM_FUNC_END(handle_sys) +SYM_CODE_END(handle_sys) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 7783f0a3d742..aed65915e932 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -161,19 +161,19 @@ static void __init smbios_parse(void) } #ifdef CONFIG_ARCH_WRITECOMBINE -pgprot_t pgprot_wc = PAGE_KERNEL_WUC; +bool wc_enabled = true; #else -pgprot_t pgprot_wc = PAGE_KERNEL_SUC; +bool wc_enabled = false; #endif -EXPORT_SYMBOL(pgprot_wc); +EXPORT_SYMBOL(wc_enabled); static int __init setup_writecombine(char *p) { if (!strcmp(p, "on")) - pgprot_wc = PAGE_KERNEL_WUC; + wc_enabled = true; else if (!strcmp(p, "off")) - pgprot_wc = PAGE_KERNEL_SUC; + wc_enabled = false; else pr_warn("Unknown writecombine setting \"%s\".\n", p); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index f3fe8c06ba4d..4dd53427f657 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -43,11 +43,11 @@ void copy_user_highpage(struct page *to, struct page *from, { void *vfrom, *vto; - vto = kmap_atomic(to); - vfrom = kmap_atomic(from); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_page(vto, vfrom); - kunmap_atomic(vfrom); - kunmap_atomic(vto); + kunmap_local(vfrom); + kunmap_local(vto); /* Make sure this page is cleared on other CPU's too before using it */ smp_wmb(); } @@ -240,6 +240,7 @@ pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); pgd_t invalid_pg_dir[_PTRS_PER_PGD] __page_aligned_bss; #ifndef __PAGETABLE_PUD_FOLDED pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pud_table); #endif #ifndef __PAGETABLE_PMD_FOLDED pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index ca17dd3a1915..d5d682f3d29f 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -17,7 +17,7 @@ #define PTRS_PER_PTE_BITS (PAGE_SHIFT - 3) .macro tlb_do_page_fault, write - SYM_FUNC_START(tlb_do_page_fault_\write) + SYM_CODE_START(tlb_do_page_fault_\write) SAVE_ALL csrrd a2, LOONGARCH_CSR_BADV move a0, sp @@ -25,13 +25,13 @@ li.w a1, \write bl do_page_fault RESTORE_ALL_AND_RET - SYM_FUNC_END(tlb_do_page_fault_\write) + SYM_CODE_END(tlb_do_page_fault_\write) .endm tlb_do_page_fault 0 tlb_do_page_fault 1 -SYM_FUNC_START(handle_tlb_protect) +SYM_CODE_START(handle_tlb_protect) BACKUP_T0T1 SAVE_ALL move a0, sp @@ -41,9 +41,9 @@ SYM_FUNC_START(handle_tlb_protect) la_abs t0, do_page_fault jirl ra, t0, 0 RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_tlb_protect) +SYM_CODE_END(handle_tlb_protect) -SYM_FUNC_START(handle_tlb_load) +SYM_CODE_START(handle_tlb_load) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -187,16 +187,16 @@ nopage_tlb_load: csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_0 jr t0 -SYM_FUNC_END(handle_tlb_load) +SYM_CODE_END(handle_tlb_load) -SYM_FUNC_START(handle_tlb_load_ptw) +SYM_CODE_START(handle_tlb_load_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_0 jr t0 -SYM_FUNC_END(handle_tlb_load_ptw) +SYM_CODE_END(handle_tlb_load_ptw) -SYM_FUNC_START(handle_tlb_store) +SYM_CODE_START(handle_tlb_store) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -343,16 +343,16 @@ nopage_tlb_store: csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_store) +SYM_CODE_END(handle_tlb_store) -SYM_FUNC_START(handle_tlb_store_ptw) +SYM_CODE_START(handle_tlb_store_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_store_ptw) +SYM_CODE_END(handle_tlb_store_ptw) -SYM_FUNC_START(handle_tlb_modify) +SYM_CODE_START(handle_tlb_modify) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -497,16 +497,16 @@ nopage_tlb_modify: csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_modify) +SYM_CODE_END(handle_tlb_modify) -SYM_FUNC_START(handle_tlb_modify_ptw) +SYM_CODE_START(handle_tlb_modify_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_modify_ptw) +SYM_CODE_END(handle_tlb_modify_ptw) -SYM_FUNC_START(handle_tlb_refill) +SYM_CODE_START(handle_tlb_refill) csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD lddir t0, t0, 3 @@ -521,4 +521,4 @@ SYM_FUNC_START(handle_tlb_refill) tlbfill csrrd t0, LOONGARCH_CSR_TLBRSAVE ertn -SYM_FUNC_END(handle_tlb_refill) +SYM_CODE_END(handle_tlb_refill) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 7b2ac1319d70..467ee6b95ae1 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, gfn_t gfn = gpa >> PAGE_SHIFT; int srcu_idx, err; kvm_pfn_t pfn; - pte_t *ptep, entry, old_pte; + pte_t *ptep, entry; bool writeable; unsigned long prot_bits; unsigned long mmu_seq; @@ -664,7 +664,6 @@ retry: entry = pfn_pte(pfn, __pgprot(prot_bits)); /* Write the PTE */ - old_pte = *ptep; set_pte(ptep, entry); err = 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3aaadfd2c8eb..d5d5388973ac 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -910,7 +910,7 @@ config ARCH_FORCE_MAX_ORDER default "6" if PPC32 && PPC_64K_PAGES range 4 10 if PPC32 && PPC_256K_PAGES default "4" if PPC32 && PPC_256K_PAGES - range 10 10 + range 10 12 default "10" help The kernel page allocator limits the size of maximal physically diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 21f681ee535a..e6fe1d5731f2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte) #define pte_wrprotect pte_wrprotect +static inline int pte_read(pte_t pte) +{ + return (pte_val(pte) & _PAGE_RO) != _PAGE_NA; +} + +#define pte_read pte_read + static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RO); diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 5cd9acf58a7d..eb6891e34cbd 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if (pte_young(*ptep)) + if (!pte_young(*ptep)) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 56ea48276356..c721478c5934 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_RW; } #endif +#ifndef pte_read static inline int pte_read(pte_t pte) { return 1; } +#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9692acb0361f..7eda33a24bb4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -137,8 +137,9 @@ ret_from_syscall: lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -172,10 +173,11 @@ syscall_exit_finish: b 1b #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 97e9ea0c7297..0f1641a31250 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -395,7 +395,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2f1026fba00d..20f72cd1d813 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -948,6 +948,8 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); + /* Set max_mapnr before paging_init() */ + set_max_mapnr(max_pfn); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 253620979d0c..6dd2f46bd3ef 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -406,6 +406,9 @@ static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto yield_prev; /* owner vcpu is running */ + if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu) + goto yield_prev; /* re-sample lock owner */ + spin_end(); preempted = true; diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 39acc2cbab4c..9e1f6558d026 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1212,14 +1212,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) smp_mb(); /* see radix__flush_tlb_mm */ exit_flush_lazy_tlbs(mm); - _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL); - - /* - * It should not be possible to have coprocessors still - * attached here. - */ - if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0)) - __flush_all_mm(mm, true); + __flush_all_mm(mm, true); preempt_enable(); } else { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8b121df7b08f..07e8f4f1e07f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -288,7 +288,6 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_pfn); kasan_late_init(); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3ba9fe411604..4d69bfb9bc11 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -104,6 +104,8 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } /* Embedded type MMU with HW exec support. This is a bit more complicated * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. + * + * This is also called once for the folio. So only work with folio->flags here. */ static inline pte_t set_pte_filter(pte_t pte) { @@ -190,29 +192,39 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr) { - /* - * Make sure hardware valid bit is not set. We don't do - * tlb flush for this update. - */ - VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this - * is called. + * is called. Filter the pte value and use the filtered value + * to setup all the ptes in the range. */ pte = set_pte_filter(pte); - /* Perform the setting of the PTE */ - arch_enter_lazy_mmu_mode(); + /* + * We don't need to call arch_enter/leave_lazy_mmu_mode() + * because we expect set_ptes to be only be used on not present + * and not hw_valid ptes. Hence there is no translation cache flush + * involved that need to be batched. + */ for (;;) { + + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + /* Perform the setting of the PTE */ __set_pte_at(mm, addr, ptep, pte, 0); if (--nr == 0) break; ptep++; - pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT)); addr += PAGE_SIZE; + /* + * increment the pfn. + */ + pte = pfn_pte(pte_pfn(pte) + 1, pte_pgprot((pte))); } - arch_leave_lazy_mmu_mode(); } void unmap_kernel_page(unsigned long va) diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 1a587618015c..18daafbe2e65 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -66,7 +66,7 @@ static int match_context(const void *v, struct file *file, unsigned fd) */ static struct spu_context *coredump_next_context(int *fd) { - struct spu_context *ctx; + struct spu_context *ctx = NULL; struct file *file; int n = iterate_fd(current->files, *fd, match_context, NULL); if (!n) @@ -74,10 +74,13 @@ static struct spu_context *coredump_next_context(int *fd) *fd = n - 1; rcu_read_lock(); - file = lookup_fd_rcu(*fd); - ctx = SPUFS_I(file_inode(file))->i_ctx; - get_spu_context(ctx); + file = lookup_fdget_rcu(*fd); rcu_read_unlock(); + if (file) { + ctx = SPUFS_I(file_inode(file))->i_ctx; + get_spu_context(ctx); + fput(file); + } return ctx; } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 38c5be34c895..10c1320adfd0 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); out: return inode; } diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index bae45b358a09..2b0cac6fb61f 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,9 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 @@ -196,7 +193,7 @@ plpar_hcall_trace: HVSC - ld r12,STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) @@ -296,9 +293,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d607ab0f7c6d..9c48fecc6719 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -273,11 +273,9 @@ config RISCV_DMA_NONCOHERENT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select DMA_BOUNCE_UNALIGNED_KMALLOC if SWIOTLB - select DMA_DIRECT_REMAP if MMU config RISCV_NONSTANDARD_CACHE_OPS bool - depends on RISCV_DMA_NONCOHERENT help This enables function pointer support for non-standard noncoherent systems to handle cache management. @@ -550,6 +548,7 @@ config RISCV_ISA_ZICBOM depends on RISCV_ALTERNATIVE default y select RISCV_DMA_NONCOHERENT + select DMA_DIRECT_REMAP help Adds support to dynamically detect the presence of the ZICBOM extension (Cache Block Management Operations) and enable its diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 566bcefeab50..e2c731cfed8c 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -77,6 +77,7 @@ config ERRATA_THEAD_PBMT config ERRATA_THEAD_CMO bool "Apply T-Head cache management errata" depends on ERRATA_THEAD && MMU + select DMA_DIRECT_REMAP select RISCV_DMA_NONCOHERENT default y help diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1329e060c548..b43a6bb7e4dc 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -6,7 +6,6 @@ # for more details. # -OBJCOPYFLAGS := -O binary LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi index 12ebe9792356..2c02358abd71 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -431,7 +431,7 @@ }; ss-pins { - pinmux = <GPIOMUX(48, GPOUT_SYS_SPI0_FSS, + pinmux = <GPIOMUX(49, GPOUT_SYS_SPI0_FSS, GPOEN_ENABLE, GPI_SYS_SPI0_FSS)>; bias-disable; diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index ce708183b6f6..ff364709a6df 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -139,6 +139,7 @@ interrupt-parent = <&plic>; #address-cells = <2>; #size-cells = <2>; + dma-noncoherent; ranges; plic: interrupt-controller@ffd8000000 { diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile index 2d644e19caef..6278c389b801 100644 --- a/arch/riscv/errata/andes/Makefile +++ b/arch/riscv/errata/andes/Makefile @@ -1 +1,5 @@ +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_errata.o := -mcmodel=medany +endif + obj-y += errata.o diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 740a979171e5..2b2f5df7ef2c 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -31,6 +31,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* + * Let's do like x86/arm64 and ignore the compat syscalls. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __riscv_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __riscv_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} + struct dyn_arch_ftrace { }; #endif diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index e7882ccb0fd4..78ea44f76718 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,6 +40,15 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool kprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h index f2183e00fdd2..3fc7deda9190 100644 --- a/arch/riscv/include/asm/uprobes.h +++ b/arch/riscv/include/asm/uprobes.h @@ -34,7 +34,18 @@ struct arch_uprobe { bool simulate; }; +#ifdef CONFIG_UPROBES bool uprobe_breakpoint_handler(struct pt_regs *regs); bool uprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool uprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ #endif /* _ASM_RISCV_UPROBES_H */ diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index a8efa053c4a5..9cc0a7669271 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -60,7 +60,7 @@ static void init_irq_stacks(void) } #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { #ifdef CONFIG_IRQ_STACKS @@ -92,7 +92,7 @@ void do_softirq_own_stack(void) #endif __do_softirq(); } -#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */ +#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else static void init_irq_stacks(void) {} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e600aab116a4..aac853ae4eb7 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -173,19 +173,6 @@ static void __init init_resources(void) if (ret < 0) goto error; -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.start != crashk_res.end) { - ret = add_resource(&iomem_resource, &crashk_res); - if (ret < 0) - goto error; - } - if (crashk_low_res.start != crashk_low_res.end) { - ret = add_resource(&iomem_resource, &crashk_low_res); - if (ret < 0) - goto error; - } -#endif - #ifdef CONFIG_CRASH_DUMP if (elfcorehdr_size > 0) { elfcorehdr_res.start = elfcorehdr_addr; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 180d951d3624..21a4d0e111bc 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -311,13 +311,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig, /* Align the stack frame. */ sp &= ~0xfUL; - /* - * Fail if the size of the altstack is not large enough for the - * sigframe construction. - */ - if (current->sas_ss_size && sp < current->sas_ss_sp) - return (void __user __force *)-1UL; - return (void __user *)sp; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 19807c4d3805..fae8f610d867 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -13,6 +13,8 @@ #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/kprobes.h> +#include <linux/uprobes.h> +#include <asm/uprobes.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/irq.h> @@ -247,22 +249,28 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } +static bool probe_single_step_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_single_step_handler(regs) : kprobe_single_step_handler(regs); +} + +static bool probe_breakpoint_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_breakpoint_handler(regs) : kprobe_breakpoint_handler(regs); +} + void handle_break(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs)) + if (probe_single_step_handler(regs)) return; - if (kprobe_breakpoint_handler(regs)) - return; -#endif -#ifdef CONFIG_UPROBES - if (uprobe_single_step_handler(regs)) + if (probe_breakpoint_handler(regs)) return; - if (uprobe_breakpoint_handler(regs)) - return; -#endif current->thread.bad_cause = regs->cause; if (user_mode(regs)) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 6115d7514972..90d4ba36d1d0 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -72,7 +72,7 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f } pagefault_out_of_memory(); return; - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { no_context(regs, addr); diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index e4a2ace92dbe..b52f0210481f 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -183,15 +183,22 @@ void set_huge_pte_at(struct mm_struct *mm, pte_t pte, unsigned long sz) { + unsigned long hugepage_shift; int i, pte_num; - if (!pte_napot(pte)) { - set_pte_at(mm, addr, ptep, pte); - return; - } + if (sz >= PGDIR_SIZE) + hugepage_shift = PGDIR_SHIFT; + else if (sz >= P4D_SIZE) + hugepage_shift = P4D_SHIFT; + else if (sz >= PUD_SIZE) + hugepage_shift = PUD_SHIFT; + else if (sz >= PMD_SIZE) + hugepage_shift = PMD_SHIFT; + else + hugepage_shift = PAGE_SHIFT; - pte_num = napot_pte_num(napot_cont_order(pte)); - for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + pte_num = sz >> hugepage_shift; + for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift)) set_pte_at(mm, addr, ptep, pte); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index ecd3ae6f4116..8581693e62d3 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -245,7 +245,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ if (!is_tail_call) - emit_mv(RV_REG_A0, RV_REG_A5, ctx); + emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); @@ -759,8 +759,10 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; - if (save_ret) - emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx); + if (save_ret) { + emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); + } /* update branch with beqz */ if (ctx->insns) { @@ -853,7 +855,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { - stack_size += 8; + stack_size += 16; /* Save both A5 (BPF R0) and A0 */ retval_off = stack_size; } @@ -957,6 +959,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (ret) goto out; emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); im->ip_after_call = ctx->insns + ctx->ninsns; /* 2 nops reserved for auipc+jalr pair */ emit(rv_nop(), ctx); @@ -988,8 +991,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_args(nregs, args_off, ctx); - if (save_ret) + if (save_ret) { emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); + emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); + } emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); @@ -1515,7 +1520,8 @@ out_be: if (ret) return ret; - emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); + if (insn->src_reg != BPF_PSEUDO_CALL) + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); break; } /* tail call */ diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 01257ce3b89c..442a74f113cb 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -57,6 +57,7 @@ static void kasan_populate_shadow(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; unsigned long untracked_end; unsigned long start, end; int i; @@ -101,8 +102,12 @@ static void kasan_populate_shadow(void) * +- shadow end ----+---------+- shadow end ---+ */ - for_each_physmem_usable_range(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) { kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) + kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + memgap_start = end; + } if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index ada83149932f..858beaf4a8cb 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb) struct inode *inode = d_inode(sb_info->update_file); sb_info->last_update = ktime_get_seconds(); - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); } /* directory tree removal functions */ @@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret); + simple_inode_init_ts(ret); if (S_ISDIR(mode)) set_nlink(ret, 2); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c1b47d608a2b..efaebba5ee19 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) return 0; } -static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) -{ - return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa); -} - static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); @@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) if (!gi->origin) return; - if (gi->alert.mask) - KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", - kvm, gi->alert.mask); - while (gisa_in_alert_list(gi->origin)) - cpu_relax(); + WARN(gi->alert.mask != 0x00, + "unexpected non zero alert.mask 0x%02x", + gi->alert.mask); + gi->alert.mask = 0x00; + if (gisa_set_iam(gi->origin, gi->alert.mask)) + process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2861e3360aff..e507692e51e7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2066,6 +2066,7 @@ struct bpf_tramp_jit { * func_addr's original caller */ int stack_size; /* Trampoline stack size */ + int backchain_off; /* Offset of backchain */ int stack_args_off; /* Offset of stack arguments for calling * func_addr, has to be at the top */ @@ -2086,9 +2087,10 @@ struct bpf_tramp_jit { * for __bpf_prog_enter() return value and * func_addr respectively */ - int r14_off; /* Offset of saved %r14 */ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */ int tccnt_off; /* Offset of saved tailcall counter */ + int r14_off; /* Offset of saved %r14, has to be at the + * bottom */ int do_fexit; /* do_fexit: label */ }; @@ -2247,8 +2249,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, * Calculate the stack layout. */ - /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */ + /* + * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x + * ABI requires, put our backchain at the end of the allocated memory. + */ tjit->stack_size = STACK_FRAME_OVERHEAD; + tjit->backchain_off = tjit->stack_size - sizeof(u64); tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64)); tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64)); tjit->ip_off = alloc_stack(tjit, sizeof(u64)); @@ -2256,16 +2262,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64)); tjit->retval_off = alloc_stack(tjit, sizeof(u64)); tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64)); - tjit->r14_off = alloc_stack(tjit, sizeof(u64)); tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); - /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ - tjit->stack_size -= STACK_FRAME_OVERHEAD; + tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2); + /* + * In accordance with the s390x ABI, the caller has allocated + * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's + * backchain, and the rest we can use. + */ + tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64); tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; + /* lgr %r1,%r15 */ + EMIT4(0xb9040000, REG_1, REG_15); /* aghi %r15,-stack_size */ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ _EMIT6(0xd203f000 | tjit->tccnt_off, 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2d9b01d7ca4c..99209085c75b 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -564,6 +564,17 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, s->dma_length = 0; } } + +static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags) +{ + size_t n = BITS_TO_LONGS(bits); + size_t bytes; + + if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes))) + return NULL; + + return vzalloc(bytes); +} int zpci_dma_init_device(struct zpci_dev *zdev) { @@ -604,13 +615,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->end_dma - zdev->start_dma + 1); zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); + zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto free_dma_table; } if (!s390_iommu_strict) { - zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); + zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); if (!zdev->lazy_bitmap) { rc = -ENOMEM; goto free_bitmap; diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 84ad709cbecb..66eda40fce36 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -453,5 +453,5 @@ ccslow: cmp %g1, 0 * we only bother with faults on loads... */ cc_fault: - ret + retl clr %o0 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1379603016fd..9b2a598cce8d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1534,6 +1534,7 @@ config NUMA depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) default y if X86_BIGSMP select USE_PERCPU_NUMA_NODE_ID + select OF_NUMA if OF help Enable NUMA (Non-Uniform Memory Access) support. diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index dc8c876fbd8f..80d76aea1f7b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,16 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + +static bool fault_in_kernel_space(unsigned long address) +{ + return false; +} + #undef __init #define __init diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 6a1821bd7d5e..83c0afb7c741 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -42,7 +42,8 @@ vdso_img-$(VDSO64-y) += 64 vdso_img-$(VDSOX32-y) += x32 vdso_img-$(VDSO32-y) += 32 -obj-$(VDSO32-y) += vdso32-setup.o +obj-$(VDSO32-y) += vdso32-setup.o +OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F) diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index 76b1f8bb0fd5..dab4ed199227 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/insn.h> +#include <linux/mm.h> #include "perf_event.h" @@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort, * The LBR logs any address in the IP, even if the IP just * faulted. This means userspace can control the from address. * Ensure we don't blindly read any address by validating it is - * a known text address. + * a known text address and not a vsyscall address. */ - if (kernel_text_address(from)) { + if (kernel_text_address(from) && !in_gate_area_no_mm(from)) { addr = (void *)from; /* * Assume we can get the maximum possible size diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 783ed339f341..21556ad87f4b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,6 +7,8 @@ * Author : K. Y. Srinivasan <kys@microsoft.com> */ +#define pr_fmt(fmt) "Hyper-V: " fmt + #include <linux/efi.h> #include <linux/types.h> #include <linux/bitfield.h> @@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; if (!hv_reenlightenment_available()) { - pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + pr_warn("reenlightenment support is unavailable\n"); return; } @@ -394,6 +396,7 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) static u8 __init get_vtl(void) { u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; @@ -416,13 +419,16 @@ static u8 __init get_vtl(void) if (hv_result_success(ret)) { ret = output->as64.low & HV_X64_VTL_MASK; } else { - pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); - ret = 0; + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); } local_irq_restore(flags); return ret; } +#else +static inline u8 get_vtl(void) { return 0; } +#endif /* * This function is to be invoked early in the boot sequence after the @@ -564,7 +570,7 @@ skip_hypercall_pg_init: if (cpu_feature_enabled(X86_FEATURE_IBT) && *(u32 *)hv_hypercall_pg != gen_endbr()) { setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + pr_warn("Disabling IBT because of Hyper-V bug\n"); } #endif @@ -604,8 +610,10 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) - ms_hyperv.vtl = get_vtl(); + ms_hyperv.vtl = get_vtl(); + + if (ms_hyperv.vtl > 0) /* non default VTL */ + hv_vtl_early_init(); return; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 36a562218010..999f5ac82fe9 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) return hv_vtl_bringup_vcpu(vp_id, start_eip); } -static int __init hv_vtl_early_init(void) +int __init hv_vtl_early_init(void) { /* * `boot_cpu_has` returns the runtime feature support, @@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void) return 0; } -early_initcall(hv_vtl_early_init); diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 31089b851c4f..a2be3aefff9f 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { static inline void fpu_sync_guest_vmexit_xfd_state(void) { } #endif -extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); static inline void fpstate_set_confidential(struct fpu_guest *gfpu) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 637fa1df3512..c715097e92fd 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -69,6 +69,8 @@ struct legacy_pic { void (*make_irq)(unsigned int irq); }; +void legacy_pic_pcat_compat(void); + extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5fcd85fd64fd..197316121f04 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -27,6 +27,7 @@ * _X - regular server parts * _D - micro server parts * _N,_P - other mobile parts + * _H - premium mobile parts * _S - other client parts * * Historical OPTDIFFs: @@ -124,6 +125,7 @@ #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_FAM6_ARROWLAKE_H 0xC5 #define INTEL_FAM6_ARROWLAKE 0xC6 #define INTEL_FAM6_LUNARLAKE_M 0xBD diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17715cb8731d..70d139406bc8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -528,7 +528,6 @@ struct kvm_pmu { u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; - struct irq_work irq_work; /* * Overlay the bitmap with a 64-bit atomic so that all bits can be diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 180b1cbfcc4e..6de6e1d95952 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -245,7 +245,7 @@ static inline void cmci_recheck(void) {} int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); -int mce_usable_address(struct mce *m); +bool mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 033b53f993c6..896445edc6a8 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } #ifdef CONFIG_HYPERV_VTL_MODE void __init hv_vtl_init_platform(void); +int __init hv_vtl_early_init(void); #else static inline void __init hv_vtl_init_platform(void) {} +static inline int __init hv_vtl_early_init(void) { return 0; } #endif #include <asm-generic/mshyperv.h> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d111350197f..61705dd0e94a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -637,12 +637,17 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e -/* Fam 17h MSRs */ -#define MSR_F17H_IRPERF 0xc00000e9 +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +/* Zen 2 */ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 @@ -1112,12 +1117,16 @@ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F -/* AMD-V MSRs */ +/* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + /* Hardware Feedback Interface */ #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c55cc243592e..14cd3cd5f85a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -271,7 +271,7 @@ .Lskip_rsb_\@: .endm -#ifdef CONFIG_CPU_UNRET_ENTRY +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) #define CALL_UNTRAIN_RET "call entry_untrain_ret" #else #define CALL_UNTRAIN_RET "" @@ -288,38 +288,24 @@ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. */ -.macro UNTRAIN_RET -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) +.macro __UNTRAIN_RET ibpb_feature, call_depth_insns +#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ - __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH + "call entry_ibpb", \ibpb_feature, \ + __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif .endm -.macro UNTRAIN_RET_VM -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) - VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \ - __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH -#endif -.endm +#define UNTRAIN_RET \ + __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) -.macro UNTRAIN_RET_FROM_CALL -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_CALL_DEPTH_TRACKING) - VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ - __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH -#endif -.endm +#define UNTRAIN_RET_VM \ + __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) + +#define UNTRAIN_RET_FROM_CALL \ + __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) .macro CALL_DEPTH_ACCOUNT @@ -348,13 +334,23 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif +#ifdef CONFIG_CPU_UNRET_ENTRY extern void retbleed_return_thunk(void); +#else +static inline void retbleed_return_thunk(void) {} +#endif + +#ifdef CONFIG_CPU_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); +#else +static inline void srso_return_thunk(void) {} +static inline void srso_alias_return_thunk(void) {} +#endif -extern void retbleed_untrain_ret(void); -extern void srso_untrain_ret(void); -extern void srso_alias_untrain_ret(void); +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); @@ -362,12 +358,7 @@ extern void entry_ibpb(void); extern void (*x86_return_thunk)(void); #ifdef CONFIG_CALL_DEPTH_TRACKING -extern void __x86_return_skl(void); - -static inline void x86_set_skl_return_thunk(void) -{ - x86_return_thunk = &__x86_return_skl; -} +extern void call_depth_return_thunk(void); #define CALL_DEPTH_ACCOUNT \ ALTERNATIVE("", \ @@ -380,12 +371,12 @@ DECLARE_PER_CPU(u64, __x86_ret_count); DECLARE_PER_CPU(u64, __x86_stuffs_count); DECLARE_PER_CPU(u64, __x86_ctxsw_count); #endif -#else -static inline void x86_set_skl_return_thunk(void) {} +#else /* !CONFIG_CALL_DEPTH_TRACKING */ +static inline void call_depth_return_thunk(void) {} #define CALL_DEPTH_ACCOUNT "" -#endif +#endif /* CONFIG_CALL_DEPTH_TRACKING */ #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b716d291d0d4..65dee2420624 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -31,6 +31,11 @@ static inline void x86_dtb_init(void) { } #define of_ioapic 0 #endif +#ifdef CONFIG_OF_EARLY_FLATTREE +void x86_flattree_get_config(void); +#else +static inline void x86_flattree_get_config(void) { } +#endif extern char cmd_line[COMMAND_LINE_SIZE]; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ad98dd1d9cfb..c31c633419fe 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -bool smp_park_other_cpus_in_init(void); void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 19bf955b67e0..87a7b917d30e 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -229,10 +229,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) -#define SVM_VM_CR_VALID_MASK 0x001fULL -#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL -#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL - #define SVM_NESTED_CTL_NP_ENABLE BIT(0) #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) @@ -268,6 +264,7 @@ enum avic_ipi_failure_cause { AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, AVIC_IPI_FAILURE_INVALID_TARGET, AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, }; #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) @@ -571,8 +568,6 @@ struct vmcb { #define SVM_CPUID_FUNC 0x8000000a -#define SVM_VM_CR_SVM_DISABLE 4 - #define SVM_SELECTOR_S_SHIFT 4 #define SVM_SELECTOR_DPL_SHIFT 5 #define SVM_SELECTOR_P_SHIFT 7 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8bae40a66282..5c367c1290c3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -496,7 +496,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned len); #define copy_mc_to_kernel copy_mc_to_kernel unsigned long __must_check -copy_mc_to_user(void *to, const void *from, unsigned len); +copy_mc_to_user(void __user *to, const void *from, unsigned len); #endif /* diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2a0ea38955df..c55c0ef47a18 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -148,6 +148,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) pr_debug("Local APIC address 0x%08x\n", madt->address); } + if (madt->flags & ACPI_MADT_PCAT_COMPAT) + legacy_pic_pcat_compat(); + /* ACPI 6.3 and newer support the online capable bit. */ if (acpi_gbl_FADT.header.revision > 6 || (acpi_gbl_FADT.header.revision == 6 && diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 517ee01503be..73be3931e4f0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 insn_buff[MAX_PATCH_LEN]; DPRINTK(ALT, "alt table %px, -> %px", start, end); + + /* + * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using + * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. + * During the process, KASAN becomes confused seeing partial LA57 + * conversion and triggers a false-positive out-of-bound report. + * + * Disable KASAN until the patching is complete. + */ + kasan_disable_current(); + /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, text_poke_early(instr, insn_buff, insn_buff_sz); } + + kasan_enable_current(); } static inline bool is_jcc32(struct insn *insn) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 356de955e78d..43238ac2e845 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -27,6 +27,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT 0x153a #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 #define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb +#define PCI_DEVICE_ID_AMD_MI300_ROOT 0x14f8 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec @@ -43,6 +44,7 @@ #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 +#define PCI_DEVICE_ID_AMD_MI300_DF_F4 0x152c /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); @@ -62,6 +64,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_ROOT) }, {} }; @@ -93,6 +96,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F3) }, {} }; @@ -112,9 +116,13 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) }, {} }; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index faa9f2299848..e9ad518a5003 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -48,11 +48,6 @@ EXPORT_SYMBOL_GPL(__x86_call_count); extern s32 __call_sites[], __call_sites_end[]; -struct thunk_desc { - void *template; - unsigned int template_size; -}; - struct core_text { unsigned long base; unsigned long end; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 03ef962a6992..d97d473545a6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -80,6 +80,10 @@ static const int amd_div0[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); +static const int amd_erratum_1485[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), + AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1010,7 +1014,6 @@ static bool cpu_has_zenbleed_microcode(void) default: return false; - break; } if (boot_cpu_data.microcode < good_rev) @@ -1040,6 +1043,8 @@ static void zenbleed_check(struct cpuinfo_x86 *c) static void init_amd(struct cpuinfo_x86 *c) { + u64 vm_cr; + early_init_amd(c); /* @@ -1091,6 +1096,14 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_cacheinfo(c); + if (cpu_has(c, X86_FEATURE_SVM)) { + rdmsrl(MSR_VM_CR, vm_cr); + if (vm_cr & SVM_VM_CR_SVM_DIS_MASK) { + pr_notice_once("SVM disabled (by BIOS) in MSR_VM_CR\n"); + clear_cpu_cap(c, X86_FEATURE_SVM); + } + } + if (!cpu_has(c, X86_FEATURE_LFENCE_RDTSC) && cpu_has(c, X86_FEATURE_XMM2)) { /* * Use LFENCE for execution serialization. On families which @@ -1149,6 +1162,10 @@ static void init_amd(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); } + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + cpu_has_amd_erratum(c, amd_erratum_1485)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 10499bcd4e39..bb0ab8466b91 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; +void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) @@ -717,7 +717,7 @@ void update_gds_msr(void) case GDS_MITIGATION_UCODE_NEEDED: case GDS_MITIGATION_HYPERVISOR: return; - }; + } wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); @@ -1019,7 +1019,6 @@ static void __init retbleed_select_mitigation(void) do_cmd_auto: case RETBLEED_CMD_AUTO: - default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) @@ -1042,8 +1041,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - if (IS_ENABLED(CONFIG_RETHUNK)) - x86_return_thunk = retbleed_return_thunk; + x86_return_thunk = retbleed_return_thunk; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -1061,7 +1059,8 @@ do_cmd_auto: case RETBLEED_MITIGATION_STUFF: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); - x86_set_skl_return_thunk(); + + x86_return_thunk = call_depth_return_thunk; break; default: @@ -1290,6 +1289,8 @@ spectre_v2_user_select_mitigation(void) spectre_v2_user_ibpb = mode; switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + break; case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: @@ -1301,8 +1302,6 @@ spectre_v2_user_select_mitigation(void) case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; - default: - break; } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", @@ -2160,6 +2159,10 @@ static int l1d_flush_prctl_get(struct task_struct *task) static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { + case SPEC_STORE_BYPASS_NONE: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_SECCOMP: @@ -2171,11 +2174,8 @@ static int ssb_prctl_get(struct task_struct *task) if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - default: - if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) - return PR_SPEC_ENABLE; - return PR_SPEC_NOT_AFFECTED; } + BUG(); } static int ib_prctl_get(struct task_struct *task) @@ -2353,6 +2353,8 @@ early_param("l1tf", l1tf_cmdline); enum srso_mitigation { SRSO_MITIGATION_NONE, + SRSO_MITIGATION_UCODE_NEEDED, + SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED, SRSO_MITIGATION_MICROCODE, SRSO_MITIGATION_SAFE_RET, SRSO_MITIGATION_IBPB, @@ -2368,11 +2370,13 @@ enum srso_mitigation_cmd { }; static const char * const srso_strings[] = { - [SRSO_MITIGATION_NONE] = "Vulnerable", - [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", - [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", - [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", - [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" + [SRSO_MITIGATION_NONE] = "Vulnerable", + [SRSO_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED] = "Vulnerable: Safe RET, no microcode", + [SRSO_MITIGATION_MICROCODE] = "Vulnerable: Microcode, no safe RET", + [SRSO_MITIGATION_SAFE_RET] = "Mitigation: Safe RET", + [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", + [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" }; static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; @@ -2406,34 +2410,44 @@ static void __init srso_select_mitigation(void) { bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE); - if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) - goto pred_cmd; + if (cpu_mitigations_off()) + return; - if (!has_microcode) { - pr_warn("IBPB-extending microcode not applied!\n"); - pr_warn(SRSO_NOTICE); - } else { + if (!boot_cpu_has_bug(X86_BUG_SRSO)) { + if (boot_cpu_has(X86_FEATURE_SBPB)) + x86_pred_cmd = PRED_CMD_SBPB; + return; + } + + if (has_microcode) { /* * Zen1/2 with SMT off aren't vulnerable after the right * IBPB microcode has been applied. + * + * Zen1/2 don't have SBPB, no need to try to enable it here. */ if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { setup_force_cpu_cap(X86_FEATURE_SRSO_NO); return; } - } - if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (has_microcode) { - pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n"); + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { srso_mitigation = SRSO_MITIGATION_IBPB; - goto pred_cmd; + goto out; } + } else { + pr_warn("IBPB-extending microcode not applied!\n"); + pr_warn(SRSO_NOTICE); + + /* may be overwritten by SRSO_CMD_SAFE_RET below */ + srso_mitigation = SRSO_MITIGATION_UCODE_NEEDED; } switch (srso_cmd) { case SRSO_CMD_OFF: - goto pred_cmd; + if (boot_cpu_has(X86_FEATURE_SBPB)) + x86_pred_cmd = PRED_CMD_SBPB; + return; case SRSO_CMD_MICROCODE: if (has_microcode) { @@ -2458,10 +2472,12 @@ static void __init srso_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_SRSO); x86_return_thunk = srso_return_thunk; } - srso_mitigation = SRSO_MITIGATION_SAFE_RET; + if (has_microcode) + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + else + srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); - goto pred_cmd; } break; @@ -2473,7 +2489,6 @@ static void __init srso_select_mitigation(void) } } else { pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); - goto pred_cmd; } break; @@ -2485,20 +2500,12 @@ static void __init srso_select_mitigation(void) } } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); - goto pred_cmd; } break; - - default: - break; } - pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); - -pred_cmd: - if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) && - boot_cpu_has(X86_FEATURE_SBPB)) - x86_pred_cmd = PRED_CMD_SBPB; +out: + pr_info("%s\n", srso_strings[srso_mitigation]); } #undef pr_fmt @@ -2704,9 +2711,7 @@ static ssize_t srso_show_state(char *buf) if (boot_cpu_has(X86_FEATURE_SRSO_NO)) return sysfs_emit(buf, "Mitigation: SMT disabled\n"); - return sysfs_emit(buf, "%s%s\n", - srso_strings[srso_mitigation], - boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode"); + return sysfs_emit(buf, "%s\n", srso_strings[srso_mitigation]); } static ssize_t gds_show_state(char *buf) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index defdc594be14..16f34639ecf7 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -290,6 +290,8 @@ static void early_init_hygon(struct cpuinfo_x86 *c) static void init_hygon(struct cpuinfo_x86 *c) { + u64 vm_cr; + early_init_hygon(c); /* @@ -320,6 +322,14 @@ static void init_hygon(struct cpuinfo_x86 *c) init_hygon_cacheinfo(c); + if (cpu_has(c, X86_FEATURE_SVM)) { + rdmsrl(MSR_VM_CR, vm_cr); + if (vm_cr & SVM_VM_CR_SVM_DIS_MASK) { + pr_notice_once("SVM disabled (by BIOS) in MSR_VM_CR\n"); + clear_cpu_cap(c, X86_FEATURE_SVM); + } + } + if (cpu_has(c, X86_FEATURE_XMM2)) { /* * Use LFENCE for execution serialization. On families which diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index c267f43de39e..f3517b8a8e91 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -713,17 +713,75 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } -bool amd_mce_is_memory_error(struct mce *m) +/* + * DRAM ECC errors are reported in the Northbridge (bank 4) with + * Extended Error Code 8. + */ +static bool legacy_mce_is_memory_error(struct mce *m) +{ + return m->bank == 4 && XEC(m->status, 0x1f) == 8; +} + +/* + * DRAM ECC errors are reported in Unified Memory Controllers with + * Extended Error Code 0. + */ +static bool smca_mce_is_memory_error(struct mce *m) { enum smca_bank_types bank_type; - /* ErrCodeExt[20:16] */ - u8 xec = (m->status >> 16) & 0x1f; + + if (XEC(m->status, 0x3f)) + return false; bank_type = smca_get_bank_type(m->extcpu, m->bank); + + return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2; +} + +bool amd_mce_is_memory_error(struct mce *m) +{ if (mce_flags.smca) - return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0; + return smca_mce_is_memory_error(m); + else + return legacy_mce_is_memory_error(m); +} + +/* + * AMD systems do not have an explicit indicator that the value in MCA_ADDR is + * a system physical address. Therefore, individual cases need to be detected. + * Future cases and checks will be added as needed. + * + * 1) General case + * a) Assume address is not usable. + * 2) Poison errors + * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy + * northbridge (bank 4). + * b) Refers to poison consumption in the core. Does not include "no action", + * "action optional", or "deferred" error severities. + * c) Will include a usable address so that immediate action can be taken. + * 3) Northbridge DRAM ECC errors + * a) Reported in legacy bank 4 with extended error code (XEC) 8. + * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, + * this bit should not be checked. + * + * NOTE: SMCA UMC memory errors fall into case #1. + */ +bool amd_mce_usable_address(struct mce *m) +{ + /* Check special northbridge case 3) first. */ + if (!mce_flags.smca) { + if (legacy_mce_is_memory_error(m)) + return true; + else if (m->bank == 4) + return false; + } - return m->bank == 4 && xec == 0x8; + /* Check poison bit for all other bank types. */ + if (m->status & MCI_STATUS_POISON) + return true; + + /* Assume address is not usable for all others. */ + return false; } static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6f35f724cc14..0214d4232346 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -453,32 +453,22 @@ static void mce_irq_work_cb(struct irq_work *entry) mce_schedule_work(); } -/* - * Check if the address reported by the CPU is in a format we can parse. - * It would be possible to add code for most other cases, but all would - * be somewhat complicated (e.g. segment offset would require an instruction - * parser). So only support physical addresses up to page granularity for now. - */ -int mce_usable_address(struct mce *m) +bool mce_usable_address(struct mce *m) { if (!(m->status & MCI_STATUS_ADDRV)) - return 0; - - /* Checks after this one are Intel/Zhaoxin-specific: */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && - boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) - return 1; - - if (!(m->status & MCI_STATUS_MISCV)) - return 0; + return false; - if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) - return 0; + switch (m->cpuvendor) { + case X86_VENDOR_AMD: + return amd_mce_usable_address(m); - if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) - return 0; + case X86_VENDOR_INTEL: + case X86_VENDOR_ZHAOXIN: + return intel_mce_usable_address(m); - return 1; + default: + return true; + } } EXPORT_SYMBOL_GPL(mce_usable_address); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index f5323551c1a9..52bce533ddcc 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -536,3 +536,23 @@ bool intel_filter_mce(struct mce *m) return false; } + +/* + * Check if the address reported by the CPU is in a format we can parse. + * It would be possible to add code for most other cases, but all would + * be somewhat complicated (e.g. segment offset would require an instruction + * parser). So only support physical addresses up to page granularity for now. + */ +bool intel_mce_usable_address(struct mce *m) +{ + if (!(m->status & MCI_STATUS_MISCV)) + return false; + + if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) + return false; + + if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) + return false; + + return true; +} diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index bcf1b3c66c9c..e13a26c9c0ac 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -49,6 +49,7 @@ void intel_init_cmci(void); void intel_init_lmce(void); void intel_clear_lmce(void); bool intel_filter_mce(struct mce *m); +bool intel_mce_usable_address(struct mce *m); #else # define cmci_intel_adjust_timer mce_adjust_timer_default static inline bool mce_intel_cmci_poll(void) { return false; } @@ -58,6 +59,7 @@ static inline void intel_init_cmci(void) { } static inline void intel_init_lmce(void) { } static inline void intel_clear_lmce(void) { } static inline bool intel_filter_mce(struct mce *m) { return false; } +static inline bool intel_mce_usable_address(struct mce *m) { return false; } #endif void mce_timer_kick(unsigned long interval); @@ -210,6 +212,7 @@ extern bool filter_mce(struct mce *m); #ifdef CONFIG_X86_MCE_AMD extern bool amd_filter_mce(struct mce *m); +bool amd_mce_usable_address(struct mce *m); /* * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits @@ -237,6 +240,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m) #else static inline bool amd_filter_mce(struct mce *m) { return false; } +static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void smca_extract_err_addr(struct mce *m) { } #endif diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 030d3b409768..19e0681f0435 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -152,6 +152,7 @@ static inline void cache_alloc_hsw_probe(void) r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; + r->cache.arch_has_sparse_bitmasks = false; r->alloc_capable = true; rdt_alloc_capable = true; @@ -267,15 +268,18 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_1_eax eax; + union cpuid_0x10_x_ecx ecx; union cpuid_0x10_x_edx edx; - u32 ebx, ecx; + u32 ebx; - cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); + cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; r->cache.shareable_bits = ebx & r->default_ctrl; r->data_width = (r->cache.cbm_len + 3) / 4; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; } @@ -872,7 +876,6 @@ static __init void rdt_init_res_defs_intel(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_per_cpu_cfg = false; r->cache.min_cbm_bits = 1; } else if (r->rid == RDT_RESOURCE_MBA) { @@ -892,7 +895,7 @@ static __init void rdt_init_res_defs_amd(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = true; + r->cache.arch_has_sparse_bitmasks = true; r->cache.arch_has_per_cpu_cfg = true; r->cache.min_cbm_bits = 0; } else if (r->rid == RDT_RESOURCE_MBA) { diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b44c487727d4..beccb0e87ba7 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -87,10 +87,12 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, /* * Check whether a cache bit mask is valid. - * For Intel the SDM says: - * Please note that all (and only) contiguous '1' combinations - * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). - * Additionally Haswell requires at least two bits set. + * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: + * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 + * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 + * + * Haswell does not support a non-contiguous 1s value and additionally + * requires at least two bits set. * AMD allows non-contiguous bitmasks. */ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) @@ -113,8 +115,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - /* Are non-contiguous bitmaps allowed? */ - if (!r->cache.arch_has_sparse_bitmaps && + /* Are non-contiguous bitmasks allowed? */ + if (!r->cache.arch_has_sparse_bitmasks && (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); return false; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 85ceaf9a31ac..a4f1aa15f0a2 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -59,6 +59,7 @@ struct rdt_fs_context { bool enable_cdpl2; bool enable_cdpl3; bool enable_mba_mbps; + bool enable_debug; }; static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) @@ -243,18 +244,17 @@ struct rdtgroup { */ #define RFTYPE_INFO BIT(0) #define RFTYPE_BASE BIT(1) -#define RF_CTRLSHIFT 4 -#define RF_MONSHIFT 5 -#define RF_TOPSHIFT 6 -#define RFTYPE_CTRL BIT(RF_CTRLSHIFT) -#define RFTYPE_MON BIT(RF_MONSHIFT) -#define RFTYPE_TOP BIT(RF_TOPSHIFT) +#define RFTYPE_CTRL BIT(4) +#define RFTYPE_MON BIT(5) +#define RFTYPE_TOP BIT(6) #define RFTYPE_RES_CACHE BIT(8) #define RFTYPE_RES_MB BIT(9) -#define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) -#define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON) -#define RF_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) -#define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) +#define RFTYPE_DEBUG BIT(10) +#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) +#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) +#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) +#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) +#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON) /* List of all resource groups */ extern struct list_head rdt_all_groups; @@ -270,7 +270,7 @@ void __exit rdtgroup_exit(void); * @mode: Access mode * @kf_ops: File operations * @flags: File specific RFTYPE_FLAGS_* flags - * @fflags: File specific RF_* or RFTYPE_* flags + * @fflags: File specific RFTYPE_* flags * @seq_show: Show content of the file * @write: Write to the file */ @@ -492,6 +492,15 @@ union cpuid_0x10_3_eax { unsigned int full; }; +/* CPUID.(EAX=10H, ECX=ResID).ECX */ +union cpuid_0x10_x_ecx { + struct { + unsigned int reserved:3; + unsigned int noncont:1; + } split; + unsigned int full; +}; + /* CPUID.(EAX=10H, ECX=ResID).EDX */ union cpuid_0x10_x_edx { struct { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ded1fc7cb7cb..f136ac046851 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,15 +30,15 @@ struct rmid_entry { struct list_head list; }; -/** - * @rmid_free_lru A least recently used list of free RMIDs +/* + * @rmid_free_lru - A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the * threshold occupancy */ static LIST_HEAD(rmid_free_lru); -/** - * @rmid_limbo_count count of currently unused but (potentially) +/* + * @rmid_limbo_count - count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that * may have a occupancy value > resctrl_rmid_realloc_threshold. User can @@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru); */ static unsigned int rmid_limbo_count; -/** +/* * @rmid_entry - The entry in the limbo and free lists. */ static struct rmid_entry *rmid_ptrs; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 725344048f85..69a1de92384a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -54,8 +54,13 @@ static struct kernfs_node *kn_mondata; static struct seq_buf last_cmd_status; static char last_cmd_status_buf[512]; +static int rdtgroup_setup_root(struct rdt_fs_context *ctx); +static void rdtgroup_destroy_root(void); + struct dentry *debugfs_resctrl; +static bool resctrl_debug; + void rdt_last_cmd_clear(void) { lockdep_assert_held(&rdtgroup_mutex); @@ -696,11 +701,10 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct rdtgroup *rdtgrp; + char *pid_str; int ret = 0; pid_t pid; - if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) - return -EINVAL; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { rdtgroup_kn_unlock(of->kn); @@ -715,7 +719,27 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, goto unlock; } - ret = rdtgroup_move_task(pid, rdtgrp, of); + while (buf && buf[0] != '\0' && buf[0] != '\n') { + pid_str = strim(strsep(&buf, ",")); + + if (kstrtoint(pid_str, 0, &pid)) { + rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); + ret = -EINVAL; + break; + } + + if (pid < 0) { + rdt_last_cmd_printf("Invalid pid %d\n", pid); + ret = -EINVAL; + break; + } + + ret = rdtgroup_move_task(pid, rdtgrp, of); + if (ret) { + rdt_last_cmd_printf("Error while processing task %d\n", pid); + break; + } + } unlock: rdtgroup_kn_unlock(of->kn); @@ -755,6 +779,38 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of, return ret; } +static int rdtgroup_closid_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + seq_printf(s, "%u\n", rdtgrp->closid); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +static int rdtgroup_rmid_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + seq_printf(s, "%u\n", rdtgrp->mon.rmid); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + #ifdef CONFIG_PROC_CPU_RESCTRL /* @@ -895,7 +951,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, return 0; } -/** +/* * rdt_bit_usage_show - Display current usage of resources * * A domain is a shared resource that can now be allocated differently. Here @@ -1117,12 +1173,24 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) } } +static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); + + return 0; +} + /** * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other * @r: Resource to which domain instance @d belongs. * @d: The domain instance for which @closid is being tested. * @cbm: Capacity bitmask being tested. * @closid: Intended closid for @cbm. + * @type: CDP type of @r. * @exclusive: Only check if overlaps with exclusive resource groups * * Checks if provided @cbm intended to be used for @closid on domain @@ -1209,6 +1277,7 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, /** * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive + * @rdtgrp: Resource group identified through its closid. * * An exclusive resource group implies that there should be no sharing of * its allocated resources. At the time this group is considered to be @@ -1251,9 +1320,8 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) return true; } -/** +/* * rdtgroup_mode_write - Modify the resource group's mode - * */ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) @@ -1357,12 +1425,11 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, return size; } -/** +/* * rdtgroup_size_show - Display size in bytes of allocated regions * * The "size" file mirrors the layout of the "schemata" file, printing the * size in bytes of each region instead of the capacity bitmask. - * */ static int rdtgroup_size_show(struct kernfs_open_file *of, struct seq_file *s, void *v) @@ -1686,77 +1753,77 @@ static struct rftype res_common_files[] = { .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_last_cmd_status_show, - .fflags = RF_TOP_INFO, + .fflags = RFTYPE_TOP_INFO, }, { .name = "num_closids", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_num_closids_show, - .fflags = RF_CTRL_INFO, + .fflags = RFTYPE_CTRL_INFO, }, { .name = "mon_features", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_mon_features_show, - .fflags = RF_MON_INFO, + .fflags = RFTYPE_MON_INFO, }, { .name = "num_rmids", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_num_rmids_show, - .fflags = RF_MON_INFO, + .fflags = RFTYPE_MON_INFO, }, { .name = "cbm_mask", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_default_ctrl_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, { .name = "min_cbm_bits", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_min_cbm_bits_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, { .name = "shareable_bits", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_shareable_bits_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, { .name = "bit_usage", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_bit_usage_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, { .name = "min_bandwidth", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_min_bw_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, { .name = "bandwidth_gran", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_bw_gran_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, { .name = "delay_linear", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_delay_linear_show, - .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, /* * Platform specific which (if any) capabilities are provided by @@ -1775,7 +1842,7 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .write = max_threshold_occ_write, .seq_show = max_threshold_occ_show, - .fflags = RF_MON_INFO | RFTYPE_RES_CACHE, + .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, }, { .name = "mbm_total_bytes_config", @@ -1817,12 +1884,19 @@ static struct rftype res_common_files[] = { .fflags = RFTYPE_BASE, }, { + .name = "mon_hw_id", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdtgroup_rmid_show, + .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, + }, + { .name = "schemata", .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .write = rdtgroup_schemata_write, .seq_show = rdtgroup_schemata_show, - .fflags = RF_CTRL_BASE, + .fflags = RFTYPE_CTRL_BASE, }, { .name = "mode", @@ -1830,14 +1904,28 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .write = rdtgroup_mode_write, .seq_show = rdtgroup_mode_show, - .fflags = RF_CTRL_BASE, + .fflags = RFTYPE_CTRL_BASE, }, { .name = "size", .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdtgroup_size_show, - .fflags = RF_CTRL_BASE, + .fflags = RFTYPE_CTRL_BASE, + }, + { + .name = "sparse_masks", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_has_sparse_bitmasks_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "ctrl_hw_id", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdtgroup_closid_show, + .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, }, }; @@ -1852,6 +1940,9 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) lockdep_assert_held(&rdtgroup_mutex); + if (resctrl_debug) + fflags |= RFTYPE_DEBUG; + for (rft = rfts; rft < rfts + len; rft++) { if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { ret = rdtgroup_add_file(kn, rft); @@ -1894,7 +1985,7 @@ void __init thread_throttle_mode_init(void) if (!rft) return; - rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB; + rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; } void __init mbm_config_rftype_init(const char *config) @@ -1903,7 +1994,7 @@ void __init mbm_config_rftype_init(const char *config) rft = rdtgroup_get_rftype_by_name(config); if (rft) - rft->fflags = RF_MON_INFO | RFTYPE_RES_CACHE; + rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; } /** @@ -2038,21 +2129,21 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); + ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); if (ret) goto out_destroy; /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = r->fflags | RF_CTRL_INFO; + fflags = r->fflags | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } for_each_mon_capable_rdt_resource(r) { - fflags = r->fflags | RF_MON_INFO; + fflags = r->fflags | RFTYPE_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); if (ret) @@ -2271,14 +2362,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) return 0; } -static void cdp_disable_all(void) -{ - if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); - if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); -} - /* * We don't allow rdtgroup directories to be created anywhere * except the root directory. Thus when looking for the rdtgroup @@ -2358,19 +2441,47 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **mon_data_kn); +static void rdt_disable_ctx(void) +{ + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); + set_mba_sc(false); + + resctrl_debug = false; +} + static int rdt_enable_ctx(struct rdt_fs_context *ctx) { int ret = 0; - if (ctx->enable_cdpl2) + if (ctx->enable_cdpl2) { ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); + if (ret) + goto out_done; + } - if (!ret && ctx->enable_cdpl3) + if (ctx->enable_cdpl3) { ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); + if (ret) + goto out_cdpl2; + } - if (!ret && ctx->enable_mba_mbps) + if (ctx->enable_mba_mbps) { ret = set_mba_sc(true); + if (ret) + goto out_cdpl3; + } + + if (ctx->enable_debug) + resctrl_debug = true; + return 0; + +out_cdpl3: + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); +out_cdpl2: + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); +out_done: return ret; } @@ -2463,6 +2574,7 @@ static void schemata_list_destroy(void) static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); + unsigned long flags = RFTYPE_CTRL_BASE; struct rdt_domain *dom; struct rdt_resource *r; int ret; @@ -2477,18 +2589,31 @@ static int rdt_get_tree(struct fs_context *fc) goto out; } + ret = rdtgroup_setup_root(ctx); + if (ret) + goto out; + ret = rdt_enable_ctx(ctx); - if (ret < 0) - goto out_cdp; + if (ret) + goto out_root; ret = schemata_list_create(); if (ret) { schemata_list_destroy(); - goto out_mba; + goto out_ctx; } closid_init(); + if (rdt_mon_capable) + flags |= RFTYPE_MON; + + ret = rdtgroup_add_files(rdtgroup_default.kn, flags); + if (ret) + goto out_schemata_free; + + kernfs_activate(rdtgroup_default.kn); + ret = rdtgroup_create_info_dir(rdtgroup_default.kn); if (ret < 0) goto out_schemata_free; @@ -2543,11 +2668,10 @@ out_info: kernfs_remove(kn_info); out_schemata_free: schemata_list_destroy(); -out_mba: - if (ctx->enable_mba_mbps) - set_mba_sc(false); -out_cdp: - cdp_disable_all(); +out_ctx: + rdt_disable_ctx(); +out_root: + rdtgroup_destroy_root(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); @@ -2559,6 +2683,7 @@ enum rdt_param { Opt_cdp, Opt_cdpl2, Opt_mba_mbps, + Opt_debug, nr__rdt_params }; @@ -2566,6 +2691,7 @@ static const struct fs_parameter_spec rdt_fs_parameters[] = { fsparam_flag("cdp", Opt_cdp), fsparam_flag("cdpl2", Opt_cdpl2), fsparam_flag("mba_MBps", Opt_mba_mbps), + fsparam_flag("debug", Opt_debug), {} }; @@ -2591,6 +2717,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; ctx->enable_mba_mbps = true; return 0; + case Opt_debug: + ctx->enable_debug = true; + return 0; } return -EINVAL; @@ -2618,7 +2747,6 @@ static int rdt_init_fs_context(struct fs_context *fc) if (!ctx) return -ENOMEM; - ctx->kfc.root = rdt_root; ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; fc->fs_private = &ctx->kfc; fc->ops = &rdt_fs_context_ops; @@ -2779,16 +2907,16 @@ static void rdt_kill_sb(struct super_block *sb) cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - set_mba_sc(false); + rdt_disable_ctx(); /*Put everything back to default values. */ for_each_alloc_capable_rdt_resource(r) reset_all_ctrls(r); - cdp_disable_all(); rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; schemata_list_destroy(); + rdtgroup_destroy_root(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); static_branch_disable_cpuslocked(&rdt_enable_key); @@ -3170,8 +3298,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, enum rdt_group_type rtype, struct rdtgroup **r) { struct rdtgroup *prdtgrp, *rdtgrp; + unsigned long files = 0; struct kernfs_node *kn; - uint files = 0; int ret; prdtgrp = rdtgroup_kn_lock_live(parent_kn); @@ -3223,7 +3351,14 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_destroy; } - files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); + if (rtype == RDTCTRL_GROUP) { + files = RFTYPE_BASE | RFTYPE_CTRL; + if (rdt_mon_capable) + files |= RFTYPE_MON; + } else { + files = RFTYPE_BASE | RFTYPE_MON; + } + ret = rdtgroup_add_files(kn, files); if (ret) { rdt_last_cmd_puts("kernfs fill error\n"); @@ -3656,6 +3791,9 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) seq_puts(seq, ",mba_MBps"); + if (resctrl_debug) + seq_puts(seq, ",debug"); + return 0; } @@ -3666,10 +3804,8 @@ static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { .show_options = rdtgroup_show_options, }; -static int __init rdtgroup_setup_root(void) +static int rdtgroup_setup_root(struct rdt_fs_context *ctx) { - int ret; - rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, KERNFS_ROOT_CREATE_DEACTIVATED | KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, @@ -3677,6 +3813,20 @@ static int __init rdtgroup_setup_root(void) if (IS_ERR(rdt_root)) return PTR_ERR(rdt_root); + ctx->kfc.root = rdt_root; + rdtgroup_default.kn = kernfs_root_to_node(rdt_root); + + return 0; +} + +static void rdtgroup_destroy_root(void) +{ + kernfs_destroy_root(rdt_root); + rdtgroup_default.kn = NULL; +} + +static void __init rdtgroup_setup_default(void) +{ mutex_lock(&rdtgroup_mutex); rdtgroup_default.closid = 0; @@ -3686,19 +3836,7 @@ static int __init rdtgroup_setup_root(void) list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); - ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE); - if (ret) { - kernfs_destroy_root(rdt_root); - goto out; - } - - rdtgroup_default.kn = kernfs_root_to_node(rdt_root); - kernfs_activate(rdtgroup_default.kn); - -out: mutex_unlock(&rdtgroup_mutex); - - return ret; } static void domain_destroy_mon_state(struct rdt_domain *d) @@ -3820,13 +3958,11 @@ int __init rdtgroup_init(void) seq_buf_init(&last_cmd_status, last_cmd_status_buf, sizeof(last_cmd_status_buf)); - ret = rdtgroup_setup_root(); - if (ret) - return ret; + rdtgroup_setup_default(); ret = sysfs_create_mount_point(fs_kobj, "resctrl"); if (ret) - goto cleanup_root; + return ret; ret = register_filesystem(&rdt_fs_type); if (ret) @@ -3859,8 +3995,6 @@ int __init rdtgroup_init(void) cleanup_mountpoint: sysfs_remove_mount_point(fs_kobj, "resctrl"); -cleanup_root: - kernfs_destroy_root(rdt_root); return ret; } @@ -3870,5 +4004,4 @@ void __exit rdtgroup_exit(void) debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); sysfs_remove_mount_point(fs_kobj, "resctrl"); - kernfs_destroy_root(rdt_root); } diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 87d38f17ff5c..afd09924094e 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -278,7 +278,7 @@ static void __init dtb_apic_setup(void) } #ifdef CONFIG_OF_EARLY_FLATTREE -static void __init x86_flattree_get_config(void) +void __init x86_flattree_get_config(void) { u32 size, map_len; void *dt; @@ -300,14 +300,10 @@ static void __init x86_flattree_get_config(void) unflatten_and_copy_device_tree(); early_memunmap(dt, map_len); } -#else -static inline void x86_flattree_get_config(void) { } #endif void __init x86_dtb_init(void) { - x86_flattree_get_config(); - if (!of_have_populated_dt()) return; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a86d37052a64..a21a4d0ecc34 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, - unsigned int size, u32 pkru) + unsigned int size, u64 xfeatures, u32 pkru) { struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { - __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); + __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, + XSTATE_COPY_XSAVE); } else { memcpy(&ustate->fxsave, &kstate->regs.fxsave, sizeof(ustate->fxsave)); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cadf68737e6b..117e74c44e75 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy + * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * @@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode) + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; @@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= fpstate->user_xfeatures; + header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } @@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, + tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } @@ -1536,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - - if (!guest_fpu) - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; - + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ @@ -1736,7 +1736,6 @@ EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); /** * fpu_xstate_prctl - xstate permission operations - * @tsk: Redundant pointer to current * @option: A subfunction of arch_prctl() * @arg2: option argument * Return: 0 if successful; otherwise, an error code diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index a4ecb04d8d64..3518fb26d06b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -43,7 +43,8 @@ enum xstate_copy_mode { struct membuf; extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode); + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 30a55207c000..c20d1832c481 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -32,6 +32,7 @@ */ static void init_8259A(int auto_eoi); +static bool pcat_compat __ro_after_init; static int i8259A_auto_eoi; DEFINE_RAW_SPINLOCK(i8259A_lock); @@ -299,15 +300,32 @@ static void unmask_8259A(void) static int probe_8259A(void) { + unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR); unsigned long flags; - unsigned char probe_val = ~(1 << PIC_CASCADE_IR); - unsigned char new_val; + + /* + * If MADT has the PCAT_COMPAT flag set, then do not bother probing + * for the PIC. Some BIOSes leave the PIC uninitialized and probing + * fails. + * + * Right now this causes problems as quite some code depends on + * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly + * when the system has an IO/APIC because then PIC is not required + * at all, except for really old machines where the timer interrupt + * must be routed through the PIC. So just pretend that the PIC is + * there and let legacy_pic->init() initialize it for nothing. + * + * Alternatively this could just try to initialize the PIC and + * repeat the probe, but for cases where there is no PIC that's + * just pointless. + */ + if (pcat_compat) + return nr_legacy_irqs(); + /* - * Check to see if we have a PIC. - * Mask all except the cascade and read - * back the value we just wrote. If we don't - * have a PIC, we will read 0xff as opposed to the - * value we wrote. + * Check to see if we have a PIC. Mask all except the cascade and + * read back the value we just wrote. If we don't have a PIC, we + * will read 0xff as opposed to the value we wrote. */ raw_spin_lock_irqsave(&i8259A_lock, flags); @@ -429,5 +447,9 @@ static int __init i8259A_init_ops(void) return 0; } - device_initcall(i8259A_init_ops); + +void __init legacy_pic_pcat_compat(void) +{ + pcat_compat = true; +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b098b1fa2470..3e05ecf8cf8d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1217,6 +1217,8 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); + x86_flattree_get_config(); + initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index dcf325b7b022..ccb0915e84e1 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -632,6 +632,23 @@ fail: sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } +static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, + unsigned long address, + bool write) +{ + if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_USER; + ctxt->fi.cr2 = address; + if (write) + ctxt->fi.error_code |= X86_PF_WRITE; + + return ES_EXCEPTION; + } + + return ES_OK; +} + static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, void *src, char *buf, unsigned int data_size, @@ -639,7 +656,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, bool backwards) { int i, b = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)src; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, false); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *s = src + (i * data_size * b); @@ -660,7 +682,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, bool backwards) { int i, s = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)dst; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, true); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *d = dst + (i * data_size * s); @@ -696,6 +723,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -704,7 +734,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -712,41 +742,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -756,12 +788,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -777,7 +812,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index d8c1e3be74c0..6395bfd87b68 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" @@ -1508,6 +1535,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ES_DECODE_FAILED; } + if (user_mode(ctxt->regs)) + return ES_UNSUPPORTED; + switch (mmio) { case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 6eb06d001bcc..96a771f9f930 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * Disable virtualization, APIC etc. and park the CPU in a HLT loop + * this function calls the 'stop' function on all other CPUs in the system. */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) * 2) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * 3) If the system uses INIT/STARTUP for CPU bringup, then - * send all present CPUs an INIT vector, which brings them - * completely out of the way. + * 3) If #2 timed out send an NMI to the CPUs which did not + * yet report * - * 4) If #3 is not possible and #2 timed out send an NMI to the - * CPUs which did not yet report - * - * 5) Wait for all other CPUs to report that they reached the + * 4) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * #4 can obviously race against a CPU reaching the HLT loop late. + * #3 can obviously race against a CPU reaching the HLT loop late. * That CPU will have reported already and the "have all CPUs * reached HLT" condition will be true despite the fact that the * other CPU is still handling the NMI. Again, there is no @@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI/INIT shutdown in case that not all + * prevent an NMI shutdown attempt in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; @@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) udelay(1); } - /* - * Park all other CPUs in INIT including "offline" CPUs, if - * possible. That's a safe place where they can't resume execution - * of HLT and then execute the HLT loop from overwritten text or - * page tables. - * - * The only downside is a broadcast MCE, but up to the point where - * the kexec() kernel brought all APs online again an MCE will just - * make HLT resume and handle the MCE. The machine crashes and burns - * due to overwritten text, page tables and data. So there is a - * choice between fire and frying pan. The result is pretty much - * the same. Chose frying pan until x86 provides a sane mechanism - * to park a CPU. - */ - if (smp_park_other_cpus_in_init()) - goto done; - - /* - * If park with INIT was not possible and the REBOOT_VECTOR didn't - * take all secondary CPUs offline, try with the NMI. - */ + /* if the REBOOT_VECTOR didn't work, try with the NMI */ if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler @@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) udelay(1); } -done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 48e040618731..2a187c0cbd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } -bool smp_park_other_cpus_in_init(void) -{ - unsigned int cpu, this_cpu = smp_processor_id(); - unsigned int apicid; - - if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) - return false; - - /* - * If this is a crash stop which does not execute on the boot CPU, - * then this cannot use the INIT mechanism because INIT to the boot - * CPU will reset the machine. - */ - if (this_cpu) - return false; - - for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { - if (cpu == this_cpu) - continue; - apicid = apic->cpu_present_to_apicid(cpu); - if (apicid == BAD_APICID) - continue; - send_init_sequence(apicid); - } - return true; -} - /* * Early setup to make printk work. */ diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index bbc440c93e08..1123ef3ccf90 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -15,6 +15,7 @@ * ( The serial nature of the boot logic and the CPU hotplug lock * protects against more than 2 CPUs entering this code. ) */ +#include <linux/workqueue.h> #include <linux/topology.h> #include <linux/spinlock.h> #include <linux/kernel.h> @@ -342,6 +343,13 @@ static inline unsigned int loop_timeout(int cpu) return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20; } +static void tsc_sync_mark_tsc_unstable(struct work_struct *work) +{ + mark_tsc_unstable("check_tsc_sync_source failed"); +} + +static DECLARE_WORK(tsc_sync_work, tsc_sync_mark_tsc_unstable); + /* * The freshly booted CPU initiates this via an async SMP function call. */ @@ -395,7 +403,7 @@ retry: "turning off TSC clock.\n", max_warp); if (random_warps) pr_warn("TSC warped randomly between CPUs\n"); - mark_tsc_unstable("check_tsc_sync_source failed"); + schedule_work(&tsc_sync_work); } /* diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f15fb71f280e..54a5596adaa6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -139,10 +139,7 @@ SECTIONS STATIC_CALL_TEXT ALIGN_ENTRY_TEXT_BEGIN -#ifdef CONFIG_CPU_SRSO *(.text..__x86.rethunk_untrain) -#endif - ENTRY_TEXT #ifdef CONFIG_CPU_SRSO @@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_CPU_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); -. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); #endif #ifdef CONFIG_CPU_SRSO +. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); /* * GNU ld cannot do XOR until 2.41. * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0544e30b4946..773132c3bf5a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - /* - * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if - * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't - * supported by the host. - */ - vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | - XFEATURE_MASK_FPSSE; - kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index dcd60b39e794..3e977dbbf993 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_lapic_get_reg(apic, lvt_type); int vector, mode, trig_mode; + int r; if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode, - NULL); + + r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); + if (r && lvt_type == APIC_LVTPC) + kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); + return r; } return 0; } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index edb89b51b383..9ae07db6f0f6 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static void kvm_pmi_trigger_fn(struct irq_work *irq_work) -{ - struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - kvm_pmu_deliver_pmi(vcpu); -} - static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - if (!pmc->intr || skip_pmi) - return; - - /* - * Inject PMI. If vcpu was in a guest mode during NMI PMI - * can be ejected on a guest mode re-entry. Otherwise we can't - * be sure that vcpu wasn't executing hlt instruction at the - * time of vmexit and is not going to re-enter guest mode until - * woken up. So we should wake it, but this is impossible from - * NMI context. Do it from irq work instead. - */ - if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu)) - irq_work_queue(&pmc_to_pmu(pmc)->irq_work); - else + if (pmc->intr && !skip_pmi) kvm_make_request(KVM_REQ_PMI, pmc->vcpu); } @@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) void kvm_pmu_reset(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - - irq_work_sync(&pmu->irq_work); static_call(kvm_x86_pmu_reset)(vcpu); } @@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); static_call(kvm_x86_pmu_init)(vcpu); - init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); pmu->event_count = 0; pmu->need_cleanup = false; kvm_pmu_refresh(vcpu); diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7d9ba301c090..1d64113de488 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) return counter & pmc_bitmask(pmc); } +static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val) +{ + pmc->counter += val - pmc_read_counter(pmc); + pmc->counter &= pmc_bitmask(pmc); +} + static inline void pmc_release_perf_event(struct kvm_pmc *pmc) { if (pmc->perf_event) { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 2092db892d7d..4b74ea91f4e6 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); break; + case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR: + /* Invalid IPI with vector < 16 */ + break; default: - pr_err("Unknown IPI interception\n"); + vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n"); } return 1; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index dd496c9e5f91..3fea8c47679e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu) nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); + + if (kvm_apicv_activated(vcpu->kvm)) + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cef5a3d0abd0..373ff6a6687b 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410..ded1d80d72cb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -531,8 +531,6 @@ static bool __kvm_is_svm_supported(void) int cpu = smp_processor_id(); struct cpuinfo_x86 *c = &cpu_data(cpu); - u64 vm_cr; - if (c->x86_vendor != X86_VENDOR_AMD && c->x86_vendor != X86_VENDOR_HYGON) { pr_err("CPU %d isn't AMD or Hygon\n", cpu); @@ -549,12 +547,6 @@ static bool __kvm_is_svm_supported(void) return false; } - rdmsrl(MSR_VM_CR, vm_cr); - if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) { - pr_err("SVM disabled (by BIOS) in MSR_VM_CR on CPU %d\n", cpu); - return false; - } - return true; } @@ -691,7 +683,7 @@ static int svm_hardware_enable(void) */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { struct sev_es_save_area *hostsa; - u32 msr_hi; + u32 __maybe_unused msr_hi; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); @@ -913,8 +905,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) if (intercept == svm->x2avic_msrs_intercepted) return; - if (!x2avic_enabled || - !apic_x2apic_mode(svm->vcpu.arch.apic)) + if (!x2avic_enabled) return; for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f2efa0bf7ae8..820d3e1f6b4f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..41cce5031126 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5382,26 +5382,37 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) -{ - if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; - - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - guest_xsave->region, - sizeof(guest_xsave->region), - vcpu->arch.pkru); -} static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, u8 *state, unsigned int size) { + /* + * Only copy state for features that are enabled for the guest. The + * state itself isn't problematic, but setting bits in the header for + * features that are supported in *this* host but not exposed to the + * guest can result in KVM_SET_XSAVE failing when live migrating to a + * compatible host without the features that are NOT exposed to the + * guest. + * + * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if + * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't + * supported by the host. + */ + u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | + XFEATURE_MASK_FPSSE; + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - state, size, vcpu->arch.pkru); + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, + supported_xcr0, vcpu->arch.pkru); +} + +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) +{ + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, @@ -12843,6 +12854,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; #endif + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c index 80efd45a7761..6e8b7e600def 100644 --- a/arch/x86/lib/copy_mc.c +++ b/arch/x86/lib/copy_mc.c @@ -70,23 +70,23 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne } EXPORT_SYMBOL_GPL(copy_mc_to_kernel); -unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len) +unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len) { unsigned long ret; if (copy_mc_fragile_enabled) { __uaccess_begin(); - ret = copy_mc_fragile(dst, src, len); + ret = copy_mc_fragile((__force void *)dst, src, len); __uaccess_end(); return ret; } if (static_cpu_has(X86_FEATURE_ERMS)) { __uaccess_begin(); - ret = copy_mc_enhanced_fast_string(dst, src, len); + ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len); __uaccess_end(); return ret; } - return copy_user_generic(dst, src, len); + return copy_user_generic((__force void *)dst, src, len); } diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cd86aeb5fdd3..a48077c5ca61 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -126,11 +126,19 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) #include <asm/GEN-for-each-reg.h> #undef GEN #endif + +#ifdef CONFIG_RETHUNK + /* - * This function name is magical and is used by -mfunction-return=thunk-extern - * for the compiler to generate JMPs to it. + * Be careful here: that label cannot really be removed because in + * some configurations and toolchains, the JMP __x86_return_thunk the + * compiler issues is either a short one or the compiler doesn't use + * relocations for same-section JMPs and that breaks the returns + * detection logic in apply_returns() and in objtool. */ -#ifdef CONFIG_RETHUNK + .section .text..__x86.return_thunk + +#ifdef CONFIG_CPU_SRSO /* * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at @@ -147,29 +155,18 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) * * As a result, srso_alias_safe_ret() becomes a safe return. */ -#ifdef CONFIG_CPU_SRSO - .section .text..__x86.rethunk_untrain - -SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .pushsection .text..__x86.rethunk_untrain +SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) -__EXPORT_THUNK(srso_alias_untrain_ret) - - .section .text..__x86.rethunk_safe -#else -/* dummy definition for alternatives */ -SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) - ANNOTATE_UNRET_SAFE - ret - int3 -SYM_FUNC_END(srso_alias_untrain_ret) -#endif + .popsection -SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + .pushsection .text..__x86.rethunk_safe +SYM_CODE_START_NOALIGN(srso_alias_safe_ret) lea 8(%_ASM_SP), %_ASM_SP UNWIND_HINT_FUNC ANNOTATE_UNRET_SAFE @@ -177,14 +174,63 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) int3 SYM_FUNC_END(srso_alias_safe_ret) - .section .text..__x86.return_thunk - -SYM_CODE_START(srso_alias_return_thunk) +SYM_CODE_START_NOALIGN(srso_alias_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR call srso_alias_safe_ret ud2 SYM_CODE_END(srso_alias_return_thunk) + .popsection + +/* + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccc30824648d48,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. + */ + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret) + ANNOTATE_NOENDBR + .byte 0x48, 0xb8 + +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + lea 8(%_ASM_SP), %_ASM_SP + ret + int3 + int3 + /* end of movabs */ + lfence + call srso_safe_ret + ud2 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) + +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" +#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" +#else /* !CONFIG_CPU_SRSO */ +#define JMP_SRSO_UNTRAIN_RET "ud2" +#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" +#endif /* CONFIG_CPU_SRSO */ + +#ifdef CONFIG_CPU_UNRET_ENTRY /* * Some generic notes on the untraining sequences: @@ -216,7 +262,7 @@ SYM_CODE_END(srso_alias_return_thunk) */ .align 64 .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc -SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) +SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret) ANNOTATE_NOENDBR /* * As executed from retbleed_untrain_ret, this is: @@ -264,72 +310,27 @@ SYM_CODE_END(retbleed_return_thunk) jmp retbleed_return_thunk int3 SYM_FUNC_END(retbleed_untrain_ret) -__EXPORT_THUNK(retbleed_untrain_ret) -/* - * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() - * above. On kernel entry, srso_untrain_ret() is executed which is a - * - * movabs $0xccccc30824648d48,%rax - * - * and when the return thunk executes the inner label srso_safe_ret() - * later, it is a stack manipulation and a RET which is mispredicted and - * thus a "safe" one to use. - */ - .align 64 - .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc -SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) - ANNOTATE_NOENDBR - .byte 0x48, 0xb8 +#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" +#else /* !CONFIG_CPU_UNRET_ENTRY */ +#define JMP_RETBLEED_UNTRAIN_RET "ud2" +#endif /* CONFIG_CPU_UNRET_ENTRY */ -/* - * This forces the function return instruction to speculate into a trap - * (UD2 in srso_return_thunk() below). This RET will then mispredict - * and execution will continue at the return site read from the top of - * the stack. - */ -SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) - lea 8(%_ASM_SP), %_ASM_SP - ret - int3 - int3 - /* end of movabs */ - lfence - call srso_safe_ret - ud2 -SYM_CODE_END(srso_safe_ret) -SYM_FUNC_END(srso_untrain_ret) -__EXPORT_THUNK(srso_untrain_ret) - -SYM_CODE_START(srso_return_thunk) - UNWIND_HINT_FUNC - ANNOTATE_NOENDBR - call srso_safe_ret - ud2 -SYM_CODE_END(srso_return_thunk) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ + JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ + JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -SYM_CODE_START(__x86_return_thunk) - UNWIND_HINT_FUNC - ANNOTATE_NOENDBR - ANNOTATE_UNRET_SAFE - ret - int3 -SYM_CODE_END(__x86_return_thunk) -EXPORT_SYMBOL(__x86_return_thunk) - -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ #ifdef CONFIG_CALL_DEPTH_TRACKING .align 64 -SYM_FUNC_START(__x86_return_skl) +SYM_FUNC_START(call_depth_return_thunk) ANNOTATE_NOENDBR /* * Keep the hotpath in a 16byte I-fetch for the non-debug @@ -356,6 +357,33 @@ SYM_FUNC_START(__x86_return_skl) ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(__x86_return_skl) +SYM_FUNC_END(call_depth_return_thunk) #endif /* CONFIG_CALL_DEPTH_TRACKING */ + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + * + * This code is only used during kernel boot or module init. All + * 'JMP __x86_return_thunk' sites are changed to something else by + * apply_returns(). + * + * This should be converted eventually to call a warning function which + * should scream loudly when the default return thunk is called after + * alternatives have been applied. + * + * That warning function cannot BUG() because the bug splat cannot be + * displayed in all possible configurations, leading to users not really + * knowing why the machine froze. + */ +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(__x86_return_thunk) +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2aadb2019b4f..c79f12e449ea 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -3,6 +3,7 @@ #include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/string.h> #include <linux/init.h> #include <linux/memblock.h> @@ -733,6 +734,8 @@ void __init x86_numa_init(void) if (!numa_init(amd_numa_init)) return; #endif + if (acpi_disabled && !numa_init(of_numa_init)) + return; } numa_init(dummy_numa_init); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 78414c6d1b5e..5dd733944629 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -69,6 +69,7 @@ static void __init pti_print_if_secure(const char *reason) pr_info("%s\n", reason); } +/* Assume mode is auto unless overridden via cmdline below. */ static enum pti_mode { PTI_AUTO = 0, PTI_FORCE_OFF, @@ -77,50 +78,49 @@ static enum pti_mode { void __init pti_check_boottime_disable(void) { - char arg[5]; - int ret; - - /* Assume mode is auto unless overridden. */ - pti_mode = PTI_AUTO; - if (hypervisor_is_type(X86_HYPER_XEN_PV)) { pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on XEN PV."); return; } - ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); - if (ret > 0) { - if (ret == 3 && !strncmp(arg, "off", 3)) { - pti_mode = PTI_FORCE_OFF; - pti_print_if_insecure("disabled on command line."); - return; - } - if (ret == 2 && !strncmp(arg, "on", 2)) { - pti_mode = PTI_FORCE_ON; - pti_print_if_secure("force enabled on command line."); - goto enable; - } - if (ret == 4 && !strncmp(arg, "auto", 4)) { - pti_mode = PTI_AUTO; - goto autosel; - } - } - - if (cmdline_find_option_bool(boot_command_line, "nopti") || - cpu_mitigations_off()) { + if (cpu_mitigations_off()) pti_mode = PTI_FORCE_OFF; + if (pti_mode == PTI_FORCE_OFF) { pti_print_if_insecure("disabled on command line."); return; } -autosel: - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (pti_mode == PTI_FORCE_ON) + pti_print_if_secure("force enabled on command line."); + + if (pti_mode == PTI_AUTO && !boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return; -enable: + setup_force_cpu_cap(X86_FEATURE_PTI); } +static int __init pti_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + pti_mode = PTI_FORCE_OFF; + else if (!strcmp(arg, "on")) + pti_mode = PTI_FORCE_ON; + else if (!strcmp(arg, "auto")) + pti_mode = PTI_AUTO; + else + return -EINVAL; + return 0; +} +early_param("pti", pti_parse_cmdline); + +static int __init pti_parse_cmdline_nopti(char *arg) +{ + pti_mode = PTI_FORCE_OFF; + return 0; +} +early_param("nopti", pti_parse_cmdline_nopti); + pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { /* diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 45d0c17ce77c..e03207de2880 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/clocksource.h> #include <asm/apic.h> @@ -178,49 +179,56 @@ module_param_named(debug, uv_nmi_debug, int, 0644); } while (0) /* Valid NMI Actions */ -#define ACTION_LEN 16 -static struct nmi_action { - char *action; - char *desc; -} valid_acts[] = { - { "kdump", "do kernel crash dump" }, - { "dump", "dump process stack for each cpu" }, - { "ips", "dump Inst Ptr info for each cpu" }, - { "kdb", "enter KDB (needs kgdboc= assignment)" }, - { "kgdb", "enter KGDB (needs gdb target remote)" }, - { "health", "check if CPUs respond to NMI" }, +enum action_t { + nmi_act_kdump, + nmi_act_dump, + nmi_act_ips, + nmi_act_kdb, + nmi_act_kgdb, + nmi_act_health, + nmi_act_max }; -typedef char action_t[ACTION_LEN]; -static action_t uv_nmi_action = { "dump" }; + +static const char * const actions[nmi_act_max] = { + [nmi_act_kdump] = "kdump", + [nmi_act_dump] = "dump", + [nmi_act_ips] = "ips", + [nmi_act_kdb] = "kdb", + [nmi_act_kgdb] = "kgdb", + [nmi_act_health] = "health", +}; + +static const char * const actions_desc[nmi_act_max] = { + [nmi_act_kdump] = "do kernel crash dump", + [nmi_act_dump] = "dump process stack for each cpu", + [nmi_act_ips] = "dump Inst Ptr info for each cpu", + [nmi_act_kdb] = "enter KDB (needs kgdboc= assignment)", + [nmi_act_kgdb] = "enter KGDB (needs gdb target remote)", + [nmi_act_health] = "check if CPUs respond to NMI", +}; + +static enum action_t uv_nmi_action = nmi_act_dump; static int param_get_action(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "%s\n", uv_nmi_action); + return sprintf(buffer, "%s\n", actions[uv_nmi_action]); } static int param_set_action(const char *val, const struct kernel_param *kp) { - int i; - int n = ARRAY_SIZE(valid_acts); - char arg[ACTION_LEN]; - - /* (remove possible '\n') */ - strscpy(arg, val, strnchrnul(val, sizeof(arg)-1, '\n') - val + 1); - - for (i = 0; i < n; i++) - if (!strcmp(arg, valid_acts[i].action)) - break; + int i, n = ARRAY_SIZE(actions); - if (i < n) { - strscpy(uv_nmi_action, arg, sizeof(uv_nmi_action)); - pr_info("UV: New NMI action:%s\n", uv_nmi_action); + i = sysfs_match_string(actions, val); + if (i >= 0) { + uv_nmi_action = i; + pr_info("UV: New NMI action:%s\n", actions[i]); return 0; } - pr_err("UV: Invalid NMI action:%s, valid actions are:\n", arg); + pr_err("UV: Invalid NMI action. Valid actions are:\n"); for (i = 0; i < n; i++) - pr_err("UV: %-8s - %s\n", - valid_acts[i].action, valid_acts[i].desc); + pr_err("UV: %-8s - %s\n", actions[i], actions_desc[i]); + return -EINVAL; } @@ -228,15 +236,10 @@ static const struct kernel_param_ops param_ops_action = { .get = param_get_action, .set = param_set_action, }; -#define param_check_action(name, p) __param_check(name, p, action_t) +#define param_check_action(name, p) __param_check(name, p, enum action_t) module_param_named(action, uv_nmi_action, action, 0644); -static inline bool uv_nmi_action_is(const char *action) -{ - return (strncmp(uv_nmi_action, action, strlen(action)) == 0); -} - /* Setup which NMI support is present in system */ static void uv_nmi_setup_mmrs(void) { @@ -727,10 +730,10 @@ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) if (cpu == 0) uv_nmi_dump_cpu_ip_hdr(); - if (current->pid != 0 || !uv_nmi_action_is("ips")) + if (current->pid != 0 || uv_nmi_action != nmi_act_ips) uv_nmi_dump_cpu_ip(cpu, regs); - if (uv_nmi_action_is("dump")) { + if (uv_nmi_action == nmi_act_dump) { pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu); show_regs(regs); } @@ -798,7 +801,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) int saved_console_loglevel = console_loglevel; pr_alert("UV: tracing %s for %d CPUs from CPU %d\n", - uv_nmi_action_is("ips") ? "IPs" : "processes", + uv_nmi_action == nmi_act_ips ? "IPs" : "processes", atomic_read(&uv_nmi_cpus_in_nmi), cpu); console_loglevel = uv_nmi_loglevel; @@ -874,7 +877,7 @@ static inline int uv_nmi_kdb_reason(void) static inline int uv_nmi_kdb_reason(void) { /* Ensure user is expecting to attach gdb remote */ - if (uv_nmi_action_is("kgdb")) + if (uv_nmi_action == nmi_act_kgdb) return 0; pr_err("UV: NMI error: KDB is not enabled in this kernel\n"); @@ -950,28 +953,35 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) master = (atomic_read(&uv_nmi_cpu) == cpu); /* If NMI action is "kdump", then attempt to do it */ - if (uv_nmi_action_is("kdump")) { + if (uv_nmi_action == nmi_act_kdump) { uv_nmi_kdump(cpu, master, regs); /* Unexpected return, revert action to "dump" */ if (master) - strscpy(uv_nmi_action, "dump", sizeof(uv_nmi_action)); + uv_nmi_action = nmi_act_dump; } /* Pause as all CPU's enter the NMI handler */ uv_nmi_wait(master); /* Process actions other than "kdump": */ - if (uv_nmi_action_is("health")) { + switch (uv_nmi_action) { + case nmi_act_health: uv_nmi_action_health(cpu, regs, master); - } else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) { + break; + case nmi_act_ips: + case nmi_act_dump: uv_nmi_dump_state(cpu, regs, master); - } else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) { + break; + case nmi_act_kdb: + case nmi_act_kgdb: uv_call_kgdb_kdb(cpu, regs, master); - } else { + break; + default: if (master) - pr_alert("UV: unknown NMI action: %s\n", uv_nmi_action); + pr_alert("UV: unknown NMI action: %d\n", uv_nmi_action); uv_nmi_sync_exit(master); + break; } /* Clear per_cpu "in_nmi" flag */ diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 54663f3e00cb..ff5afc8a5a41 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -53,7 +53,7 @@ struct uv_rtc_timer_head { struct { int lcpu; /* systemwide logical cpu number */ u64 expires; /* next timer expiration for this cpu */ - } cpu[]; + } cpu[] __counted_by(ncpus); }; /* |