From fe18894930a025617114aa8ca0adbf94d5bffe89 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 25 Apr 2022 16:41:00 +0800 Subject: iio: mma8452: fix probe fail when device tree compatible is used. Correct the logic for the probe. First check of_match_table, if not meet, then check i2c_driver.id_table. If both not meet, then return fail. Fixes: a47ac019e7e8 ("iio: mma8452: Fix probe failing when an i2c_device_id is used") Signed-off-by: Haibo Chen Link: https://lore.kernel.org/r/1650876060-17577-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 9c02c681c84c..4156d216c640 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1556,11 +1556,13 @@ static int mma8452_probe(struct i2c_client *client, mutex_init(&data->lock); data->chip_info = device_get_match_data(&client->dev); - if (!data->chip_info && id) { - data->chip_info = &mma_chip_info_table[id->driver_data]; - } else { - dev_err(&client->dev, "unknown device model\n"); - return -ENODEV; + if (!data->chip_info) { + if (id) { + data->chip_info = &mma_chip_info_table[id->driver_data]; + } else { + dev_err(&client->dev, "unknown device model\n"); + return -ENODEV; + } } ret = iio_read_mount_matrix(&client->dev, &data->orientation); -- cgit v1.2.3 From 048058399f19d43cf21de9f5d36cd8144337d004 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 6 May 2022 11:50:40 +0200 Subject: iio: adc: axp288: Override TS pin bias current for some models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 9bcf15f75cac ("iio: adc: axp288: Fix TS-pin handling") we preserve the bias current set by the firmware at boot. This fixes issues we were seeing on various models. Some models like the Nuvision Solo 10 Draw tablet actually need the old hardcoded 80ųA bias current for battery temperature monitoring to work properly. Add a quirk entry for the Nuvision Solo 10 Draw to the DMI quirk table to restore setting the bias current to 80ųA on this model. Fixes: 9bcf15f75cac ("iio: adc: axp288: Fix TS-pin handling") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215882 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220506095040.21008-1-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp288_adc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index a4b8be5b8f88..580361bd9849 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -196,6 +196,14 @@ static const struct dmi_system_id axp288_adc_ts_bias_override[] = { }, .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, }, + { + /* Nuvision Solo 10 Draw */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TMAX"), + DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"), + }, + .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, + }, {} }; -- cgit v1.2.3 From bb52d3691db8cf24cea049235223f3599778f264 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 1 May 2022 21:50:29 +0200 Subject: iio: magnetometer: yas530: Fix memchr_inv() misuse The call to check if the calibration is all zeroes is doing it wrong: memchr_inv() returns NULL if the the calibration contains all zeroes, but the check is for != NULL. Fix it up. It's probably not an urgent fix because the inner check for BIT(7) in data[13] will save us. But fix it. Fixes: de8860b1ed47 ("iio: magnetometer: Add driver for Yamaha YAS530") Reported-by: Jakob Hauser Cc: Andy Shevchenko Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220501195029.151852-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/yamaha-yas530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c index 9ff7b0e56cf6..b2bc637150bf 100644 --- a/drivers/iio/magnetometer/yamaha-yas530.c +++ b/drivers/iio/magnetometer/yamaha-yas530.c @@ -639,7 +639,7 @@ static int yas532_get_calibration_data(struct yas5xx *yas5xx) dev_dbg(yas5xx->dev, "calibration data: %*ph\n", 14, data); /* Sanity check, is this all zeroes? */ - if (memchr_inv(data, 0x00, 13)) { + if (memchr_inv(data, 0x00, 13) == NULL) { if (!(data[13] & BIT(7))) dev_warn(yas5xx->dev, "calibration is blank!\n"); } -- cgit v1.2.3 From f8ef475aa069cd72e9e7bdb2d60dc6a89e2bafad Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Mon, 9 May 2022 07:24:05 +0000 Subject: iio: adc: xilinx-ams: fix return error variable Return irq instead of ret which always equals to zero here. Fixes: d5c70627a794 ("iio: adc: Add Xilinx AMS driver") Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Michal Simek Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-ams.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index a55396c1f8b2..a7687706012d 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -1409,7 +1409,7 @@ static int ams_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return ret; + return irq; ret = devm_request_irq(&pdev->dev, irq, &ams_irq, 0, "ams-irq", indio_dev); -- cgit v1.2.3 From 036d20726c30267724416e966c9f92db07de8081 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 31 May 2022 13:08:56 -0700 Subject: drm/msm: Ensure mmap offset is initialized If a GEM object is allocated, and then exported as a dma-buf fd which is mmap'd before or without the GEM buffer being directly mmap'd, the vma_node could be unitialized. This leads to a situation where the CPU mapping is not correctly torn down in drm_vma_node_unmap(). Fixes: e5516553999f ("drm: call drm_gem_object_funcs.mmap with fake offset") Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220531200857.136547-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gem_prime.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 44485363f37a..14ab9a627d8b 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -964,7 +964,7 @@ static const struct drm_driver msm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_mmap = msm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index fdbaad53eb84..34f74d47b3c1 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -246,6 +246,7 @@ unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_t void msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev); +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 94ab705e9b8a..dcc8a573bc76 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -11,6 +11,21 @@ #include "msm_drv.h" #include "msm_gem.h" +int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + /* Ensure the mmap offset is initialized. We lazily initialize it, + * so if it has not been first mmap'd directly as a GEM object, the + * mmap offset will not be already initialized. + */ + ret = drm_gem_create_mmap_offset(obj); + if (ret) + return ret; + + return drm_gem_prime_mmap(obj, vma); +} + struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); -- cgit v1.2.3 From f84d83d8165570380f55f4ce578bfb131a9266c5 Mon Sep 17 00:00:00 2001 From: David Virag Date: Thu, 26 May 2022 07:58:40 +0200 Subject: arm64: dts: exynos: Correct UART clocks on Exynos7885 The clocks in the serial UART nodes were swapped by mistake on Exynos7885. This only worked correctly because of a mistake in the clock driver which has been fixed. With the fixed clock driver in place, the baudrate of the UARTs get miscalculated. Fix this by correcting the clocks in the dtsi. Fixes: 06874015327b ("arm64: dts: exynos: Add initial device tree support for Exynos7885 SoC") Signed-off-by: David Virag Link: https://lore.kernel.org/r/20220526055840.45209-3-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski --- arch/arm64/boot/dts/exynos/exynos7885.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/exynos/exynos7885.dtsi b/arch/arm64/boot/dts/exynos/exynos7885.dtsi index 3170661f5b67..9c233c56558c 100644 --- a/arch/arm64/boot/dts/exynos/exynos7885.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7885.dtsi @@ -280,8 +280,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart0_bus>; - clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART0_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>, + <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <64>; status = "disabled"; @@ -293,8 +293,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart1_bus>; - clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART1_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>, + <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; @@ -306,8 +306,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&uart2_bus>; - clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART2_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>, + <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; -- cgit v1.2.3 From c4c79525042a4a7df96b73477feaf232fe44ae81 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 23 May 2022 18:55:13 +0400 Subject: ARM: exynos: Fix refcount leak in exynos_map_pmu of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. of_node_put() checks null pointer. Fixes: fce9e5bb2526 ("ARM: EXYNOS: Add support for mapping PMU base address via DT") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220523145513.12341-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski --- arch/arm/mach-exynos/exynos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 8b48326be9fd..51a247ca4da8 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -149,6 +149,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) -- cgit v1.2.3 From 67c7fc6cd915d809be4de2eed323aa5f2205c52f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 May 2022 11:29:13 +0200 Subject: memory: omap-gpmc: OMAP_GPMC should depend on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 The Texas Instruments OMAP General Purpose Memory Controller (GPMC) is only present on TI OMAP2/3/4/5, Keystone, AM33xx, AM43x, DRA7xx, TI81xx, and K3 SoCs. Hence add a dependency on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3, to prevent asking the user about this driver when configuring a kernel without OMAP2+, Keystone, or K3 SoC family support. Fixes: be34f45f0d4aa91c ("memory: omap-gpmc: Make OMAP_GPMC config visible and selectable") Signed-off-by: Geert Uytterhoeven Acked-by: Roger Quadros Link: https://lore.kernel.org/r/f6780f572f882ed6ab5934321942cf2b412bf8d1.1652174849.git.geert+renesas@glider.be Signed-off-by: Krzysztof Kozlowski --- drivers/memory/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig index b7800b37af78..ac1a411648d8 100644 --- a/drivers/memory/Kconfig +++ b/drivers/memory/Kconfig @@ -105,6 +105,7 @@ config TI_EMIF config OMAP_GPMC tristate "Texas Instruments OMAP SoC GPMC driver" depends on OF_ADDRESS + depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST select GPIOLIB help This driver is for the General Purpose Memory Controller (GPMC) -- cgit v1.2.3 From 038ae37c510fd57cbc543ac82db1e7b23b28557a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 1 Jun 2022 16:01:18 +0400 Subject: memory: mtk-smi: add missing put_device() call in mtk_smi_device_link_common The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the error handling paths. Fixes: 47404757702e ("memory: mtk-smi: Add device link for smi-sub-common") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220601120118.60225-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/mtk-smi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 86a3d34f418e..4c5154e0bf00 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -404,13 +404,16 @@ static int mtk_smi_device_link_common(struct device *dev, struct device **com_de of_node_put(smi_com_node); if (smi_com_pdev) { /* smi common is the supplier, Make sure it is ready before */ - if (!platform_get_drvdata(smi_com_pdev)) + if (!platform_get_drvdata(smi_com_pdev)) { + put_device(&smi_com_pdev->dev); return -EPROBE_DEFER; + } smi_com_dev = &smi_com_pdev->dev; link = device_link_add(dev, smi_com_dev, DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); if (!link) { dev_err(dev, "Unable to link smi-common dev\n"); + put_device(&smi_com_pdev->dev); return -ENODEV; } *com_dev = smi_com_dev; -- cgit v1.2.3 From 1332661e09304b7b8e84e5edc11811ba08d12abe Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 2 Jun 2022 08:17:21 +0400 Subject: memory: samsung: exynos5422-dmc: Fix refcount leak in of_get_dram_timings of_parse_phandle() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. This function doesn't call of_node_put() in some error paths. To unify the structure, Add put_node label and goto it on errors. Fixes: 6e7674c3c6df ("memory: Add DMC driver for Exynos5422") Signed-off-by: Miaoqian Lin Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20220602041721.64348-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/samsung/exynos5422-dmc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c index 4733e7898ffe..c491cd549644 100644 --- a/drivers/memory/samsung/exynos5422-dmc.c +++ b/drivers/memory/samsung/exynos5422-dmc.c @@ -1187,33 +1187,39 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) dmc->timing_row = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_row) - return -ENOMEM; + if (!dmc->timing_row) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_data = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_data) - return -ENOMEM; + if (!dmc->timing_data) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_power = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_power) - return -ENOMEM; + if (!dmc->timing_power) { + ret = -ENOMEM; + goto put_node; + } dmc->timings = of_lpddr3_get_ddr_timings(np_ddr, dmc->dev, DDR_TYPE_LPDDR3, &dmc->timings_arr_size); if (!dmc->timings) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get timings from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } dmc->min_tck = of_lpddr3_get_min_tck(np_ddr, dmc->dev); if (!dmc->min_tck) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get tck from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } /* Sorted array of OPPs with frequency ascending */ @@ -1227,13 +1233,14 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) clk_period_ps); } - of_node_put(np_ddr); /* Take the highest frequency's timings as 'bypass' */ dmc->bypass_timing_row = dmc->timing_row[idx - 1]; dmc->bypass_timing_data = dmc->timing_data[idx - 1]; dmc->bypass_timing_power = dmc->timing_power[idx - 1]; +put_node: + of_node_put(np_ddr); return ret; } -- cgit v1.2.3 From 21b511ddee09a78909035ec47a6a594349fe3296 Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Mon, 6 Jun 2022 11:55:25 +0530 Subject: spi: spi-cadence: Fix SPI CS gets toggling sporadically As part of unprepare_transfer_hardware, SPI controller will be disabled which will indirectly deassert the CS line. This will create a problem in some of the devices where message will be transferred with cs_change flag set(CS should not be deasserted). As per SPI controller implementation, if SPI controller is disabled then all output enables are inactive and all pins are set to input mode which means CS will go to default state high(deassert). This leads to an issue when core explicitly ask not to deassert the CS (cs_change = 1). This patch fix the above issue by checking the Slave select status bits from configuration register before disabling the SPI. Signed-off-by: Sai Krishna Potthuri Signed-off-by: Amit Kumar Mahapatra Link: https://lore.kernel.org/r/20220606062525.18447-1-amit.kumar-mahapatra@xilinx.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index a23d4f6329f5..00f0d1b3a722 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -69,6 +69,7 @@ #define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */ #define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */ #define CDNS_SPI_SS0 0x1 /* Slave Select zero */ +#define CDNS_SPI_NOSS 0x3C /* No Slave select */ /* * SPI Interrupt Registers bit Masks @@ -450,15 +451,20 @@ static int cdns_prepare_transfer_hardware(struct spi_master *master) * @master: Pointer to the spi_master structure which provides * information about the controller. * - * This function disables the SPI master controller. + * This function disables the SPI master controller when no slave selected. * * Return: 0 always */ static int cdns_unprepare_transfer_hardware(struct spi_master *master) { struct cdns_spi *xspi = spi_master_get_devdata(master); + u32 ctrl_reg; - cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); + /* Disable the SPI if slave is deselected */ + ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR); + ctrl_reg = (ctrl_reg & CDNS_SPI_CR_SSCTRL) >> CDNS_SPI_SS_SHIFT; + if (ctrl_reg == CDNS_SPI_NOSS) + cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); return 0; } -- cgit v1.2.3 From 7b40322f7183a92c4303457528ae7cda571c60b9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 27 May 2022 11:11:43 +0200 Subject: spi: cadence: Detect transmit FIFO depth The depth of the transmit FIFO for the Cadence SPI controller is currently hardcoded to 128. But the depth is a synthesis configuration parameter of the core and can vary between different SoCs. If the configured FIFO size is less than 128 the driver will busy loop in the cdns_spi_fill_tx_fifo() function waiting for FIFO space to become available. Depending on the length and speed of the transfer it can spin for a significant amount of time. The cdns_spi_fill_tx_fifo() function is called from the drivers interrupt handler, so it can leave interrupts disabled for a prolonged amount of time. In addition the read FIFO will also overflow and data will be discarded. To avoid this detect the actual size of the FIFO and use that rather than the hardcoded value. To detect the FIFO size the FIFO threshold register is used. The register is sized so that it can hold FIFO size - 1 as its maximum value. Bits that are not needed to hold the threshold value will always read 0. By writing 0xffff to the register and then reading back the value in the register we get the FIFO size. Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20220527091143.3780378-1-lars@metafoo.de Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 00f0d1b3a722..31d778e9d255 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -93,9 +93,6 @@ #define CDNS_SPI_ER_ENABLE 0x00000001 /* SPI Enable Bit Mask */ #define CDNS_SPI_ER_DISABLE 0x0 /* SPI Disable Bit Mask */ -/* SPI FIFO depth in bytes */ -#define CDNS_SPI_FIFO_DEPTH 128 - /* Default number of chip select lines */ #define CDNS_SPI_DEFAULT_NUM_CS 4 @@ -111,6 +108,7 @@ * @rx_bytes: Number of bytes requested * @dev_busy: Device busy flag * @is_decoded_cs: Flag for decoder property set or not + * @tx_fifo_depth: Depth of the TX FIFO */ struct cdns_spi { void __iomem *regs; @@ -124,6 +122,7 @@ struct cdns_spi { int rx_bytes; u8 dev_busy; u32 is_decoded_cs; + unsigned int tx_fifo_depth; }; /* Macros for the SPI controller read/write */ @@ -305,7 +304,7 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi) { unsigned long trans_cnt = 0; - while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) && + while ((trans_cnt < xspi->tx_fifo_depth) && (xspi->tx_bytes > 0)) { /* When xspi in busy condition, bytes may send failed, @@ -469,6 +468,24 @@ static int cdns_unprepare_transfer_hardware(struct spi_master *master) return 0; } +/** + * cdns_spi_detect_fifo_depth - Detect the FIFO depth of the hardware + * @xspi: Pointer to the cdns_spi structure + * + * The depth of the TX FIFO is a synthesis configuration parameter of the SPI + * IP. The FIFO threshold register is sized so that its maximum value can be the + * FIFO size - 1. This is used to detect the size of the FIFO. + */ +static void cdns_spi_detect_fifo_depth(struct cdns_spi *xspi) +{ + /* The MSBs will get truncated giving us the size of the FIFO */ + cdns_spi_write(xspi, CDNS_SPI_THLD, 0xffff); + xspi->tx_fifo_depth = cdns_spi_read(xspi, CDNS_SPI_THLD) + 1; + + /* Reset to default */ + cdns_spi_write(xspi, CDNS_SPI_THLD, 0x1); +} + /** * cdns_spi_probe - Probe method for the SPI driver * @pdev: Pointer to the platform_device structure @@ -541,6 +558,8 @@ static int cdns_spi_probe(struct platform_device *pdev) if (ret < 0) xspi->is_decoded_cs = 0; + cdns_spi_detect_fifo_depth(xspi); + /* SPI controller initializations */ cdns_spi_init_hw(xspi); -- cgit v1.2.3 From 2283679f4c468df367830b7eb8f22d48a6940e19 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 2 Jun 2022 11:10:22 +0200 Subject: spi: spi-mem: Fix spi_mem_poll_status() In spi_mem_exec_op(), in case cs_gpiod descriptor is set, exec_op() callback can't be used. The same must be applied in spi_mem_poll_status(), poll_status() callback can't be used, we must use the legacy path using read_poll_timeout(). Tested on STM32mp257c-ev1 specific evaluation board on which a spi-nand was mounted instead of a spi-nor. Signed-off-by: Patrice Chotard Tested-by: Patrice Chotard Link: https://lore.kernel.org/r/20220602091022.358127-1-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index e8de4f5017cd..0c79193d9697 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -808,7 +808,7 @@ int spi_mem_poll_status(struct spi_mem *mem, op->data.dir != SPI_MEM_DATA_IN) return -EINVAL; - if (ctlr->mem_ops && ctlr->mem_ops->poll_status) { + if (ctlr->mem_ops && ctlr->mem_ops->poll_status && !mem->spi->cs_gpiod) { ret = spi_mem_access_start(mem); if (ret) return ret; -- cgit v1.2.3 From 6aa27071e4354c351d98e345fc888b70f335f185 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 25 May 2022 20:41:41 -0500 Subject: spi: dt-bindings: Fix unevaluatedProperties warnings in examples The 'unevaluatedProperties' schema checks is not fully working and doesn't catch some cases where there's a $ref to another schema. A fix is pending, but results in new warnings in examples. 'spi-max-frequency' is supposed to be a per SPI peripheral device property, not a SPI controller property, so drop it. Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20220526014141.2872567-1-robh@kernel.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml | 1 - Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml index ece261b8e963..7326c0a28d16 100644 --- a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml +++ b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml @@ -47,6 +47,5 @@ examples: clocks = <&clkcfg CLK_SPI0>; interrupt-parent = <&plic>; interrupts = <54>; - spi-max-frequency = <25000000>; }; ... diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml index e2c7b934c50d..78ceb9d67754 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml @@ -110,7 +110,6 @@ examples: pinctrl-names = "default"; pinctrl-0 = <&qup_spi1_default>; interrupts = ; - spi-max-frequency = <50000000>; #address-cells = <1>; #size-cells = <0>; }; -- cgit v1.2.3 From 122839b58a089ff7f231759e2c8f63790724cae2 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 23 May 2022 18:15:59 +0100 Subject: firmware: arm_scmi: Relax base protocol sanity checks on the protocol list Even though malformed replies from firmware must be treated carefully to avoid memory corruption in the kernel, some out-of-spec SCMI replies can be tolerated to avoid breaking existing deployed system, as long as they won't cause memory issues. Relax the sanity checks on the recieved protocol list in the base protocol to avoid breaking one of the deployed platform whose firmware is not easily upgradable currently. Link: https://lore.kernel.org/r/20220523171559.472112-1-cristian.marussi@arm.com Cc: Etienne Carriere Cc: Sudeep Holla Reported-by: Nicolas Frattaroli Tested-By: Frank Wunderlich Acked-by: Michael Riesch Acked-by: Etienne Carriere Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/base.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index 20fba7370f4e..d0ac96da1ddf 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -221,11 +221,17 @@ scmi_base_implementation_list_get(const struct scmi_protocol_handle *ph, calc_list_sz = (1 + (loop_num_ret - 1) / sizeof(u32)) * sizeof(u32); if (calc_list_sz != real_list_sz) { - dev_err(dev, - "Malformed reply - real_sz:%zd calc_sz:%u\n", - real_list_sz, calc_list_sz); - ret = -EPROTO; - break; + dev_warn(dev, + "Malformed reply - real_sz:%zd calc_sz:%u (loop_num_ret:%d)\n", + real_list_sz, calc_list_sz, loop_num_ret); + /* + * Bail out if the expected list size is bigger than the + * total payload size of the received reply. + */ + if (calc_list_sz > real_list_sz) { + ret = -EPROTO; + break; + } } for (loop = 0; loop < loop_num_ret; loop++) -- cgit v1.2.3 From d0c94bef70e71e364c0a016b0e92307cd4d1d719 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 30 May 2022 12:52:36 +0100 Subject: firmware: arm_scmi: Remove all the unused local variables While using SCMI iterators helpers a few local automatic variables are defined but then used only as input for sizeof operators. cppcheck is fooled to complain about this with: | drivers/firmware/arm_scmi/sensors.c:341:48: warning: Variable 'msg' is | not assigned a value. [unassignedVariable] | struct scmi_msg_sensor_list_update_intervals *msg; Even though this is an innocuos warning, since the uninitialized variable is at the end never used in the reported cases, fix these occurences all over SCMI stack to avoid keeping unneeded objects on the stack. Link: https://lore.kernel.org/r/20220530115237.277077-1-cristian.marussi@arm.com Cc: Dan Carpenter Cc: Sudeep Holla Reported-by: kernel test robot Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 5 ++--- drivers/firmware/arm_scmi/perf.c | 4 ++-- drivers/firmware/arm_scmi/sensors.c | 12 ++++++------ drivers/firmware/arm_scmi/voltage.c | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 4d36a9a133d1..1a718faa4192 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -266,9 +266,7 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, struct scmi_clock_info *clk) { int ret; - void *iter; - struct scmi_msg_clock_describe_rates *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_clk_describe_prepare_message, .update_state = iter_clk_describe_update_state, @@ -281,7 +279,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, iter = ph->hops->iter_response_init(ph, &ops, SCMI_MAX_NUM_RATES, CLOCK_DESCRIBE_RATES, - sizeof(*msg), &cpriv); + sizeof(struct scmi_msg_clock_describe_rates), + &cpriv); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 8f4051aca220..c1f701623058 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -332,7 +332,6 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, u32 domain, { int ret; void *iter; - struct scmi_msg_perf_describe_levels *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_perf_levels_prepare_message, .update_state = iter_perf_levels_update_state, @@ -345,7 +344,8 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, u32 domain, iter = ph->hops->iter_response_init(ph, &ops, MAX_OPPS, PERF_DESCRIBE_LEVELS, - sizeof(*msg), &ppriv); + sizeof(struct scmi_msg_perf_describe_levels), + &ppriv); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 21e0ce89b153..75b9d716508e 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -338,7 +338,6 @@ static int scmi_sensor_update_intervals(const struct scmi_protocol_handle *ph, struct scmi_sensor_info *s) { void *iter; - struct scmi_msg_sensor_list_update_intervals *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_intervals_prepare_message, .update_state = iter_intervals_update_state, @@ -351,7 +350,8 @@ static int scmi_sensor_update_intervals(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->intervals.count, SENSOR_LIST_UPDATE_INTERVALS, - sizeof(*msg), &upriv); + sizeof(struct scmi_msg_sensor_list_update_intervals), + &upriv); if (IS_ERR(iter)) return PTR_ERR(iter); @@ -459,7 +459,6 @@ scmi_sensor_axis_extended_names_get(const struct scmi_protocol_handle *ph, struct scmi_sensor_info *s) { void *iter; - struct scmi_msg_sensor_axis_description_get *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_axes_desc_prepare_message, .update_state = iter_axes_extended_name_update_state, @@ -468,7 +467,8 @@ scmi_sensor_axis_extended_names_get(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_NAME_GET, - sizeof(*msg), s); + sizeof(struct scmi_msg_sensor_axis_description_get), + s); if (IS_ERR(iter)) return PTR_ERR(iter); @@ -481,7 +481,6 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, { int ret; void *iter; - struct scmi_msg_sensor_axis_description_get *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_axes_desc_prepare_message, .update_state = iter_axes_desc_update_state, @@ -495,7 +494,8 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_DESCRIPTION_GET, - sizeof(*msg), s); + sizeof(struct scmi_msg_sensor_axis_description_get), + s); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c index 9d195d8719ab..97df6d3dd131 100644 --- a/drivers/firmware/arm_scmi/voltage.c +++ b/drivers/firmware/arm_scmi/voltage.c @@ -180,7 +180,6 @@ static int scmi_voltage_levels_get(const struct scmi_protocol_handle *ph, { int ret; void *iter; - struct scmi_msg_cmd_describe_levels *msg; struct scmi_iterator_ops ops = { .prepare_message = iter_volt_levels_prepare_message, .update_state = iter_volt_levels_update_state, @@ -193,7 +192,8 @@ static int scmi_voltage_levels_get(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, v->num_levels, VOLTAGE_DESCRIBE_LEVELS, - sizeof(*msg), &vpriv); + sizeof(struct scmi_msg_cmd_describe_levels), + &vpriv); if (IS_ERR(iter)) return PTR_ERR(iter); -- cgit v1.2.3 From 4266e2f70d4388b8c6a95056169954ff049ced94 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 14 May 2022 11:35:05 -0300 Subject: arm64: s32g2: Pass unit name to soc node Pass unit name to soc node to fix the following W=1 build warning: arch/arm64/boot/dts/freescale/s32g2.dtsi:82.6-123.4: Warning (unit_address_vs_reg): /soc: node has a reg or ranges property, but no unit name Signed-off-by: Fabio Estevam Reviewed-by: Chester Lin Signed-off-by: Chester Lin Link: https://lore.kernel.org/r/20220514143505.1554813-1-festevam@gmail.com --- arch/arm64/boot/dts/freescale/s32g2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/s32g2.dtsi b/arch/arm64/boot/dts/freescale/s32g2.dtsi index 59ea8a25aa4c..824d401e7a2c 100644 --- a/arch/arm64/boot/dts/freescale/s32g2.dtsi +++ b/arch/arm64/boot/dts/freescale/s32g2.dtsi @@ -79,7 +79,7 @@ }; }; - soc { + soc@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 680c0aee97690c3f595e074a5f677599aac5d26b Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Wed, 25 May 2022 09:49:22 +0800 Subject: MAINTAINERS: add a new reviewer for S32G Add the NXP S32 Linux team as a designated review group of s32g. Signed-off-by: Chester Lin Link: https://lore.kernel.org/r/20220525161422.14156-1-clin@suse.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a6d3bd9d2a8d..fbe76ce119ea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2467,6 +2467,7 @@ ARM/NXP S32G ARCHITECTURE M: Chester Lin R: Andreas Färber R: Matthias Brugger +R: NXP S32 Linux Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm64/boot/dts/freescale/s32g*.dts* -- cgit v1.2.3 From 118f767413ada4eef7825fbd4af7c0866f883441 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 25 May 2022 16:20:29 +0300 Subject: RDMA/qedr: Fix reporting QP timeout attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to save the passed QP timeout attribute when the QP gets modified, so when calling query QP the right value is reported and not the converted value that is required by the firmware. This issue was found while running the pyverbs tests. Fixes: cecbcddf6461 ("qedr: Add support for QP verbs") Link: https://lore.kernel.org/r/20220525132029.84813-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/verbs.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8def88cfa300..db9ef3e1eb97 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -418,6 +418,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f0f43b6db89e..03ed7c0fae50 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2613,6 +2613,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2772,7 +2774,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; -- cgit v1.2.3 From 1d0811b03eb30b2f0793acaa96c6ce90b8b9c87a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Jun 2022 12:57:58 +0200 Subject: parisc/stifb: Fix fb_is_primary_device() only available with CONFIG_FB_STI Fix this build error noticed by the kernel test robot: drivers/video/console/sticore.c:1132:5: error: redefinition of 'fb_is_primary_device' arch/parisc/include/asm/fb.h:18:19: note: previous definition of 'fb_is_primary_device' Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: stable@vger.kernel.org # v5.10+ --- arch/parisc/include/asm/fb.h | 2 +- drivers/video/console/sticore.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index d63a2acb91f2..55d29c4f716e 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } -#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +#if defined(CONFIG_FB_STI) int fb_is_primary_device(struct fb_info *info); #else static inline int fb_is_primary_device(struct fb_info *info) diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index fa23bf0247b0..bd4dc97d4d34 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -1148,6 +1148,7 @@ int sti_call(const struct sti_struct *sti, unsigned long func, return ret; } +#if defined(CONFIG_FB_STI) /* check if given fb_info is the primary device */ int fb_is_primary_device(struct fb_info *info) { @@ -1163,6 +1164,7 @@ int fb_is_primary_device(struct fb_info *info) return (sti->info == info); } EXPORT_SYMBOL(fb_is_primary_device); +#endif MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer"); MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines"); -- cgit v1.2.3 From 122e951eb8045338089b086c8bd9b0b9afb04a92 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Sat, 4 Jun 2022 21:33:00 +0200 Subject: regulator: qcom_smd: correct MP5496 ranges Currently set MP5496 Buck and LDO ranges dont match its datasheet[1]. According to the datasheet: Buck range is 0.6-2.1875V with a 12.5mV step LDO range is 0.8-3.975V with a 25mV step. So, correct the ranges according to the datasheet[1]. [1] https://www.monolithicpower.com/en/documentview/productdocument/index/version/2/document_type/Datasheet/lang/en/sku/MP5496GR/document_id/6906/ Signed-off-by: Robert Marko Link: https://lore.kernel.org/r/20220604193300.125758-2-robimarko@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 7dff94a2eb7e..ef6e47d025ca 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -723,19 +723,19 @@ static const struct regulator_desc pms405_pldo600 = { static const struct regulator_desc mp5496_smpa2 = { .linear_ranges = (struct linear_range[]) { - REGULATOR_LINEAR_RANGE(725000, 0, 27, 12500), + REGULATOR_LINEAR_RANGE(600000, 0, 127, 12500), }, .n_linear_ranges = 1, - .n_voltages = 28, + .n_voltages = 128, .ops = &rpm_mp5496_ops, }; static const struct regulator_desc mp5496_ldoa2 = { .linear_ranges = (struct linear_range[]) { - REGULATOR_LINEAR_RANGE(1800000, 0, 60, 25000), + REGULATOR_LINEAR_RANGE(800000, 0, 127, 25000), }, .n_linear_ranges = 1, - .n_voltages = 61, + .n_voltages = 128, .ops = &rpm_mp5496_ops, }; -- cgit v1.2.3 From ce0db505bc0c51ef5e9ba446c660de7e26f78f29 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Mon, 6 Jun 2022 23:13:05 +0200 Subject: drm/msm: Fix double pm_runtime_disable() call Following commit 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}"), any call to adreno_unbind() will disable runtime PM twice, as indicated by the call trees below: adreno_unbind() -> pm_runtime_force_suspend() -> pm_runtime_disable() adreno_unbind() -> gpu->funcs->destroy() [= aNxx_destroy()] -> adreno_gpu_cleanup() -> pm_runtime_disable() Note that pm_runtime_force_suspend() is called right before gpu->funcs->destroy() and both functions are called unconditionally. With recent addition of the eDP AUX bus code, this problem manifests itself when the eDP panel cannot be found yet and probing is deferred. On the first probe attempt, we disable runtime PM twice as described above. This then causes any later probe attempt to fail with [drm:adreno_load_gpu [msm]] *ERROR* Couldn't power up the GPU: -13 preventing the driver from loading. As there seem to be scenarios where the aNxx_destroy() functions are not called from adreno_unbind(), simply removing pm_runtime_disable() from inside adreno_unbind() does not seem to be the proper fix. This is what commit 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}") intended to fix. Therefore, instead check whether runtime PM is still enabled, and only disable it in that case. Fixes: 17e822f7591f ("drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup}") Signed-off-by: Maximilian Luz Tested-by: Bjorn Andersson Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/20220606211305.189585-1-luzmaximilian@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 4e665c806a14..f944b69e2a25 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -1057,7 +1057,8 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); - pm_runtime_disable(&priv->gpu_pdev->dev); + if (pm_runtime_enabled(&priv->gpu_pdev->dev)) + pm_runtime_disable(&priv->gpu_pdev->dev); msm_gpu_cleanup(&adreno_gpu->base); } -- cgit v1.2.3 From 46d6e11320d21dc40fce229ab3504125847de27e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 3 Jun 2022 09:30:12 +0200 Subject: MAINTAINERS: Update BCM2711/BCM2835 maintainer I haven't been able to find time to maintain BCM2711/BCM2835 these last months, so it's only fair to pass the baton to Florian who's been doing the work. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Florian Fainelli --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a6d3bd9d2a8d..320a2ac788dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3789,12 +3789,12 @@ N: bcmbca N: bcm[9]?47622 BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE -M: Nicolas Saenz Julienne +M: Florian Fainelli R: Broadcom internal kernel review list L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git +T: git git://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: drivers/pci/controller/pcie-brcmstb.c F: drivers/staging/vc04_services -- cgit v1.2.3 From 37d838de369b07b596c19ff3662bf0293fdb09ee Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 26 May 2022 11:53:22 +0400 Subject: soc: bcm: brcmstb: pm: pm-arm: Fix refcount leak in brcmstb_pm_probe of_find_matching_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. In brcmstb_init_sram, it pass dn to of_address_to_resource(), of_address_to_resource() will call of_find_device_by_node() to take reference, so we should release the reference returned by of_find_matching_node(). Fixes: 0b741b8234c8 ("soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)") Signed-off-by: Miaoqian Lin Reviewed-by: Andy Shevchenko Signed-off-by: Florian Fainelli --- drivers/soc/bcm/brcmstb/pm/pm-arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c index 3cbb165d6e30..70ad0f3dce28 100644 --- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c +++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c @@ -783,6 +783,7 @@ static int brcmstb_pm_probe(struct platform_device *pdev) } ret = brcmstb_init_sram(dn); + of_node_put(dn); if (ret) { pr_err("error setting up SRAM for PM\n"); return ret; -- cgit v1.2.3 From 204e6ceaa1035cb7b92b156517e88842ebb4c7ff Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Wed, 8 Jun 2022 00:05:21 +0900 Subject: exfat: use updated exfat_chain directly during renaming In order for a file to access its own directory entry set, exfat_inode_info(ei) has two copied values. One is ei->dir, which is a snapshot of exfat_chain of the parent directory, and the other is ei->entry, which is the offset of the start of the directory entry set in the parent directory. Since the parent directory can be updated after the snapshot point, it should be used only for accessing one's own directory entry set. However, as of now, during renaming, it could try to traverse or to allocate clusters via snapshot values, it does not make sense. This potential problem has been revealed when exfat_update_parent_info() was removed by commit d8dad2588add ("exfat: fix referencing wrong parent directory information after renaming"). However, I don't think it's good idea to bring exfat_update_parent_info() back. Instead, let's use the updated exfat_chain of parent directory diectly. Fixes: d8dad2588add ("exfat: fix referencing wrong parent directory information after renaming") Reported-by: Wang Yugui Signed-off-by: Sungjong Seo Tested-by: Wang Yugui Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 76acc3721951..c6eaf7e9ea74 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1198,7 +1198,9 @@ static int __exfat_rename(struct inode *old_parent_inode, return -ENOENT; } - exfat_chain_dup(&olddir, &ei->dir); + exfat_chain_set(&olddir, EXFAT_I(old_parent_inode)->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(old_parent_inode), sbi), + EXFAT_I(old_parent_inode)->flags); dentry = ei->entry; ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh); -- cgit v1.2.3 From 2c5947cffd81ac8181346efacdca3c777ab330ba Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 7 Jun 2022 20:59:18 +0200 Subject: Revert "mtd: rawnand: add support for Toshiba TC58NVG0S3HTA00 NAND flash" This reverts commit 3380557fc7e28d9bce7607e16d98f123d36da4ca. It turned out this "4-byte" ID might have been an honest mistake. Regrettably, the chip Andreas has might be a counterfeit or is damaged in some other way and shouldn't have ended up in a router. Andreas reported his chip is returning just four bytes: "98 f1 80 15 00 00 00 00". However, according to Kioxia/Toshiba's datasheet, there should have been at least another byte that would have contained the correct OOB size that Andreas needed. Miquel and Andreas are both favoring reverting the patch over further, possibly hacky modifications: "[Reverting] is the safest option here. Apart from this device, we do not know how many devices have these damaged/counterfeit chips. If it is just a couple and only on Fritzboxes, as suggested in the Github issue the patch could be carried through OpenWrt[...]" Thanks to several users on the openwrt forum and github issue, who stayed along for the ride: - Peter-vdL for reporting the issue and testing patches. - neg2led and Hannu Nyman who did all the datasheet digging and debugging. Cc: Andreas Boehler Suggested-by: Andreas Boehler Suggested-by: Miquel Raynal Link: https://github.com/openwrt/openwrt/issues/9962 Signed-off-by: Christian Lamparter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220607185918.1048204-1-chunkeey@gmail.com --- drivers/mtd/nand/raw/nand_ids.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_ids.c b/drivers/mtd/nand/raw/nand_ids.c index 88c2440b47d8..dacc5529b3df 100644 --- a/drivers/mtd/nand/raw/nand_ids.c +++ b/drivers/mtd/nand/raw/nand_ids.c @@ -29,9 +29,6 @@ struct nand_flash_dev nand_flash_ids[] = { {"TC58NVG0S3E 1G 3.3V 8-bit", { .id = {0x98, 0xd1, 0x90, 0x15, 0x76, 0x14, 0x01, 0x00} }, SZ_2K, SZ_128, SZ_128K, 0, 8, 64, NAND_ECC_INFO(1, SZ_512), }, - {"TC58NVG0S3HTA00 1G 3.3V 8-bit", - { .id = {0x98, 0xf1, 0x80, 0x15} }, - SZ_2K, SZ_128, SZ_128K, 0, 4, 128, NAND_ECC_INFO(8, SZ_512), }, {"TC58NVG2S0F 4G 3.3V 8-bit", { .id = {0x98, 0xdc, 0x90, 0x26, 0x76, 0x15, 0x01, 0x08} }, SZ_4K, SZ_512, SZ_256K, 0, 8, 224, NAND_ECC_INFO(4, SZ_512) }, -- cgit v1.2.3 From 552ca27929ab28b341ae9b2629f0de3a84c98ee8 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 10 May 2022 07:46:12 +0200 Subject: ARM: dts: imx7: Move hsic_phy power domain to HSIC PHY node Move the power domain to its actual user. This keeps the power domain enabled even when the USB host is runtime suspended. This is necessary to detect any downstream events, like device attach. Fixes: 02f8eb40ef7b ("ARM: dts: imx7s: Add power domain for imx7d HSIC") Suggested-by: Jun Li Signed-off-by: Alexander Stein Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 008e3da460f1..039eed79d2e7 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -120,6 +120,7 @@ compatible = "usb-nop-xceiv"; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clock-names = "main_clk"; + power-domains = <&pgc_hsic_phy>; #phy-cells = <0>; }; @@ -1153,7 +1154,6 @@ compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; reg = <0x30b30000 0x200>; interrupts = ; - power-domains = <&pgc_hsic_phy>; clocks = <&clks IMX7D_USB_CTRL_CLK>; fsl,usbphy = <&usbphynop3>; fsl,usbmisc = <&usbmisc3 0>; -- cgit v1.2.3 From 8e60294c8012fe4c66c3590376670998902fd822 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 8 Jun 2022 17:40:51 +0100 Subject: firmware: arm_scmi: Fix SENSOR_AXIS_NAME_GET behaviour when unsupported Avoid to invoke SENSOR_AXIS_NAME_GET on sensors that have not declared at least one of their axes as supporting extended names. Since the returned list of axes supporting extended names is not necessarily comprising all the existing axes of the specified sensor, take care also to properly pick the axis descriptor from the ID embedded in the response. Link: https://lore.kernel.org/r/20220608164051.2326087-1-cristian.marussi@arm.com Fixes: 802b0bed011e ("firmware: arm_scmi: Add SCMI v3.1 SENSOR_AXIS_NAME_GET support") Cc: Peter Hilber Cc: Sudeep Holla Reviewed-by: Peter Hilber Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/sensors.c | 56 ++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 75b9d716508e..8a93dd944c49 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -358,15 +358,20 @@ static int scmi_sensor_update_intervals(const struct scmi_protocol_handle *ph, return ph->hops->iter_response_run(iter); } +struct scmi_apriv { + bool any_axes_support_extended_names; + struct scmi_sensor_info *s; +}; + static void iter_axes_desc_prepare_message(void *message, const unsigned int desc_index, const void *priv) { struct scmi_msg_sensor_axis_description_get *msg = message; - const struct scmi_sensor_info *s = priv; + const struct scmi_apriv *apriv = priv; /* Set the number of sensors to be skipped/already read */ - msg->id = cpu_to_le32(s->id); + msg->id = cpu_to_le32(apriv->s->id); msg->axis_desc_index = cpu_to_le32(desc_index); } @@ -393,12 +398,14 @@ iter_axes_desc_process_response(const struct scmi_protocol_handle *ph, u32 attrh, attrl; struct scmi_sensor_axis_info *a; size_t dsize = SCMI_MSG_RESP_AXIS_DESCR_BASE_SZ; - struct scmi_sensor_info *s = priv; + struct scmi_apriv *apriv = priv; const struct scmi_axis_descriptor *adesc = st->priv; attrl = le32_to_cpu(adesc->attributes_low); + if (SUPPORTS_EXTENDED_AXIS_NAMES(attrl)) + apriv->any_axes_support_extended_names = true; - a = &s->axis[st->desc_index + st->loop_idx]; + a = &apriv->s->axis[st->desc_index + st->loop_idx]; a->id = le32_to_cpu(adesc->id); a->extended_attrs = SUPPORTS_EXTEND_ATTRS(attrl); @@ -444,10 +451,19 @@ iter_axes_extended_name_process_response(const struct scmi_protocol_handle *ph, void *priv) { struct scmi_sensor_axis_info *a; - const struct scmi_sensor_info *s = priv; + const struct scmi_apriv *apriv = priv; struct scmi_sensor_axis_name_descriptor *adesc = st->priv; + u32 axis_id = le32_to_cpu(adesc->axis_id); + + if (axis_id >= st->max_resources) + return -EPROTO; - a = &s->axis[st->desc_index + st->loop_idx]; + /* + * Pick the corresponding descriptor based on the axis_id embedded + * in the reply since the list of axes supporting extended names + * can be a subset of all the axes. + */ + a = &apriv->s->axis[axis_id]; strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE); st->priv = ++adesc; @@ -458,21 +474,36 @@ static int scmi_sensor_axis_extended_names_get(const struct scmi_protocol_handle *ph, struct scmi_sensor_info *s) { + int ret; void *iter; struct scmi_iterator_ops ops = { .prepare_message = iter_axes_desc_prepare_message, .update_state = iter_axes_extended_name_update_state, .process_response = iter_axes_extended_name_process_response, }; + struct scmi_apriv apriv = { + .any_axes_support_extended_names = false, + .s = s, + }; iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_NAME_GET, sizeof(struct scmi_msg_sensor_axis_description_get), - s); + &apriv); if (IS_ERR(iter)) return PTR_ERR(iter); - return ph->hops->iter_response_run(iter); + /* + * Do not cause whole protocol initialization failure when failing to + * get extended names for axes. + */ + ret = ph->hops->iter_response_run(iter); + if (ret) + dev_warn(ph->dev, + "Failed to get axes extended names for %s (ret:%d).\n", + s->name, ret); + + return 0; } static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, @@ -486,6 +517,10 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, .update_state = iter_axes_desc_update_state, .process_response = iter_axes_desc_process_response, }; + struct scmi_apriv apriv = { + .any_axes_support_extended_names = false, + .s = s, + }; s->axis = devm_kcalloc(ph->dev, s->num_axis, sizeof(*s->axis), GFP_KERNEL); @@ -495,7 +530,7 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, iter = ph->hops->iter_response_init(ph, &ops, s->num_axis, SENSOR_AXIS_DESCRIPTION_GET, sizeof(struct scmi_msg_sensor_axis_description_get), - s); + &apriv); if (IS_ERR(iter)) return PTR_ERR(iter); @@ -503,7 +538,8 @@ static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph, if (ret) return ret; - if (PROTOCOL_REV_MAJOR(version) >= 0x3) + if (PROTOCOL_REV_MAJOR(version) >= 0x3 && + apriv.any_axes_support_extended_names) ret = scmi_sensor_axis_extended_names_get(ph, s); return ret; -- cgit v1.2.3 From 4314f9f4f85832b5082f4e38b07b63b11baa538c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 8 Jun 2022 10:55:28 +0100 Subject: firmware: arm_scmi: Avoid using extended string-buffers sizes if not necessary Commit b260fccaebdc2 ("firmware: arm_scmi: Add SCMI v3.1 protocol extended names support") moved all the name string buffers to use the extended buffer size of 64 instead of the required 16 bytes. While that should be fine if the firmware terminates the string before 16 bytes, there is possibility of copying random data if the name is not NULL terminated by the firmware. SCMI base protocol agent_name/vendor_id/sub_vendor_id are defined by the specification as NULL-terminated ASCII strings up to 16-bytes in length. The underlying buffers and message descriptors are currently bigger than needed; resize them to fit only the strictly needed 16 bytes to avoid any possible leaks when reading data from the firmware. Change the size argument of strlcpy to use SCMI_SHORT_NAME_MAX_SIZE always when dealing with short domain names, so as to limit the possibility that an ill-formed non-NULL terminated short reply from the SCMI platform firmware can leak stale content laying in the underlying transport shared memory area. While at that, convert all strings handling routines to use the preferred strscpy. Link: https://lore.kernel.org/r/20220608095530.497879-1-cristian.marussi@arm.com Fixes: b260fccaebdc2 ("firmware: arm_scmi: Add SCMI v3.1 protocol extended names support") Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/base.c | 8 ++++---- drivers/firmware/arm_scmi/clock.c | 2 +- drivers/firmware/arm_scmi/perf.c | 2 +- drivers/firmware/arm_scmi/power.c | 2 +- drivers/firmware/arm_scmi/protocols.h | 2 -- drivers/firmware/arm_scmi/reset.c | 2 +- drivers/firmware/arm_scmi/sensors.c | 4 ++-- drivers/firmware/arm_scmi/voltage.c | 2 +- include/linux/scmi_protocol.h | 9 +++++---- 9 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index d0ac96da1ddf..a52f084a6a87 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -36,7 +36,7 @@ struct scmi_msg_resp_base_attributes { struct scmi_msg_resp_base_discover_agent { __le32 agent_id; - u8 name[SCMI_MAX_STR_SIZE]; + u8 name[SCMI_SHORT_NAME_MAX_SIZE]; }; @@ -119,7 +119,7 @@ scmi_base_vendor_id_get(const struct scmi_protocol_handle *ph, bool sub_vendor) ret = ph->xops->do_xfer(ph, t); if (!ret) - memcpy(vendor_id, t->rx.buf, size); + strscpy(vendor_id, t->rx.buf, size); ph->xops->xfer_put(ph, t); @@ -276,7 +276,7 @@ static int scmi_base_discover_agent_get(const struct scmi_protocol_handle *ph, ret = ph->xops->do_xfer(ph, t); if (!ret) { agent_info = t->rx.buf; - strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE); + strscpy(name, agent_info->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); @@ -375,7 +375,7 @@ static int scmi_base_protocol_init(const struct scmi_protocol_handle *ph) int id, ret; u8 *prot_imp; u32 version; - char name[SCMI_MAX_STR_SIZE]; + char name[SCMI_SHORT_NAME_MAX_SIZE]; struct device *dev = ph->dev; struct scmi_revision_info *rev = scmi_revision_area_get(ph); diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 1a718faa4192..c7a83f6e38e5 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -153,7 +153,7 @@ static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph, if (!ret) { u32 latency = 0; attributes = le32_to_cpu(attr->attributes); - strlcpy(clk->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(clk->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); /* clock_enable_latency field is present only since SCMI v3.1 */ if (PROTOCOL_REV_MAJOR(version) >= 0x2) latency = le32_to_cpu(attr->clock_enable_latency); diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index c1f701623058..bbb0331801ff 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -252,7 +252,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = (dom_info->sustained_freq_khz * 1000) / dom_info->sustained_perf_level; - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); diff --git a/drivers/firmware/arm_scmi/power.c b/drivers/firmware/arm_scmi/power.c index 964882cc8747..356e83631664 100644 --- a/drivers/firmware/arm_scmi/power.c +++ b/drivers/firmware/arm_scmi/power.c @@ -122,7 +122,7 @@ scmi_power_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->state_set_notify = SUPPORTS_STATE_SET_NOTIFY(flags); dom_info->state_set_async = SUPPORTS_STATE_SET_ASYNC(flags); dom_info->state_set_sync = SUPPORTS_STATE_SET_SYNC(flags); - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 73304af5ec4a..c679f3fb8718 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -24,8 +24,6 @@ #include -#define SCMI_SHORT_NAME_MAX_SIZE 16 - #define PROTOCOL_REV_MINOR_MASK GENMASK(15, 0) #define PROTOCOL_REV_MAJOR_MASK GENMASK(31, 16) #define PROTOCOL_REV_MAJOR(x) ((u16)(FIELD_GET(PROTOCOL_REV_MAJOR_MASK, (x)))) diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index a420a9102094..673f3eb498f4 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -116,7 +116,7 @@ scmi_reset_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->latency_us = le32_to_cpu(attr->latency); if (dom_info->latency_us == U32_MAX) dom_info->latency_us = 0; - strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); + strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 8a93dd944c49..7288c6117838 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -412,7 +412,7 @@ iter_axes_desc_process_response(const struct scmi_protocol_handle *ph, attrh = le32_to_cpu(adesc->attributes_high); a->scale = S32_EXT(SENSOR_SCALE(attrh)); a->type = SENSOR_TYPE(attrh); - strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE); + strscpy(a->name, adesc->name, SCMI_SHORT_NAME_MAX_SIZE); if (a->extended_attrs) { unsigned int ares = le32_to_cpu(adesc->resolution); @@ -634,7 +634,7 @@ iter_sens_descr_process_response(const struct scmi_protocol_handle *ph, SUPPORTS_AXIS(attrh) ? SENSOR_AXIS_NUMBER(attrh) : 0, SCMI_MAX_NUM_SENSOR_AXIS); - strscpy(s->name, sdesc->name, SCMI_MAX_STR_SIZE); + strscpy(s->name, sdesc->name, SCMI_SHORT_NAME_MAX_SIZE); /* * If supported overwrite short name with the extended diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c index 97df6d3dd131..5de93f637bd4 100644 --- a/drivers/firmware/arm_scmi/voltage.c +++ b/drivers/firmware/arm_scmi/voltage.c @@ -233,7 +233,7 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, v = vinfo->domains + dom; v->id = dom; attributes = le32_to_cpu(resp_dom->attr); - strlcpy(v->name, resp_dom->name, SCMI_MAX_STR_SIZE); + strscpy(v->name, resp_dom->name, SCMI_SHORT_NAME_MAX_SIZE); /* * If supported overwrite short name with the extended one; diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 1c58646ba381..704111f63993 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -13,8 +13,9 @@ #include #include -#define SCMI_MAX_STR_SIZE 64 -#define SCMI_MAX_NUM_RATES 16 +#define SCMI_MAX_STR_SIZE 64 +#define SCMI_SHORT_NAME_MAX_SIZE 16 +#define SCMI_MAX_NUM_RATES 16 /** * struct scmi_revision_info - version information structure @@ -36,8 +37,8 @@ struct scmi_revision_info { u8 num_protocols; u8 num_agents; u32 impl_ver; - char vendor_id[SCMI_MAX_STR_SIZE]; - char sub_vendor_id[SCMI_MAX_STR_SIZE]; + char vendor_id[SCMI_SHORT_NAME_MAX_SIZE]; + char sub_vendor_id[SCMI_SHORT_NAME_MAX_SIZE]; }; struct scmi_clock_info { -- cgit v1.2.3 From fe0fde09e1cb83effcf8fafa372533f438d93a1a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 2 Jun 2022 10:07:38 +0900 Subject: ksmbd: use SOCK_NONBLOCK type for kernel_accept() I found that normally it is O_NONBLOCK but there are different value for some arch. /include/linux/net.h: #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif /arch/alpha/include/asm/socket.h: #define SOCK_NONBLOCK 0x40000000 Use SOCK_NONBLOCK instead of O_NONBLOCK for kernel_accept(). Suggested-by: David Howells Signed-off-by: Namjae Jeon Reviewed-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/transport_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d3..143bba4e4db8 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) -- cgit v1.2.3 From 06ee1c0aebd5dfdf6bf237165b22415f64f38b7c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 11 Jun 2022 10:32:44 +0200 Subject: ksmbd: smbd: Remove useless license text when SPDX-License-Identifier is already used An SPDX-License-Identifier is already in place. There is no need to duplicate part of the corresponding license. Signed-off-by: Christophe JAILLET Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/transport_rdma.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index d035e060c2f0..35b55ee94fe5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -5,16 +5,6 @@ * * Author(s): Long Li , * Hyunchul Lee - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define SUBMOD_NAME "smb_direct" -- cgit v1.2.3 From 44dbdf3bb3f44bf08897ed5f22eb262edcf3d926 Mon Sep 17 00:00:00 2001 From: Ludvig Pärsson Date: Fri, 10 Jun 2022 16:00:55 +0200 Subject: firmware: arm_scmi: Fix incorrect error propagation in scmi_voltage_descriptors_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scmi_voltage_descriptors_get() will incorrecly return an error code if the last iteration of the for loop that retrieves the descriptors is skipped due to an error. Skipping an iteration in the loop is not an error, but the `ret` value from the last iteration will be propagated when the function returns. Fix by not saving return values that should not be propagated. This solution also minimizes the risk of future patches accidentally re-introducing this bug. Link: https://lore.kernel.org/r/20220610140055.31491-1-ludvig.parsson@axis.com Reviewed-by: Cristian Marussi Signed-off-by: Ludvig Pärsson [sudeep.holla: Removed unneeded reset_rx_to_maxsz and check for return value from scmi_voltage_levels_get as suggested by Cristian] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/voltage.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c index 5de93f637bd4..eaa8d944926a 100644 --- a/drivers/firmware/arm_scmi/voltage.c +++ b/drivers/firmware/arm_scmi/voltage.c @@ -225,9 +225,8 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, /* Retrieve domain attributes at first ... */ put_unaligned_le32(dom, td->tx.buf); - ret = ph->xops->do_xfer(ph, td); /* Skip domain on comms error */ - if (ret) + if (ph->xops->do_xfer(ph, td)) continue; v = vinfo->domains + dom; @@ -249,12 +248,8 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, v->async_level_set = true; } - ret = scmi_voltage_levels_get(ph, v); /* Skip invalid voltage descriptors */ - if (ret) - continue; - - ph->xops->reset_rx_to_maxsz(ph, td); + scmi_voltage_levels_get(ph, v); } ph->xops->xfer_put(ph, td); -- cgit v1.2.3 From 97a4087a363888b818225d890c912a52a24b9f73 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 13 Jun 2022 13:11:34 +0200 Subject: MAINTAINERS: add include/dt-bindings/gpio to GPIO SUBSYSTEM Maintainers of the directory Documentation/devicetree/bindings/gpio are also the maintainers of the corresponding directory include/dt-bindings/gpio. Add the file entry for include/dt-bindings/gpio to the appropriate section in MAINTAINERS. Signed-off-by: Lukas Bulwahn Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1fc9ead83d2a..ef73aaae2bd2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8479,6 +8479,7 @@ F: Documentation/devicetree/bindings/gpio/ F: Documentation/driver-api/gpio/ F: drivers/gpio/ F: include/asm-generic/gpio.h +F: include/dt-bindings/gpio/ F: include/linux/gpio.h F: include/linux/gpio/ F: include/linux/of_gpio.h -- cgit v1.2.3 From 30756cc1645080445c957192bc8a7af3b193d617 Mon Sep 17 00:00:00 2001 From: Tom Schwindl Date: Sun, 12 Jun 2022 19:01:09 +0000 Subject: docs: driver-api: gpio: Fix filename mismatch The filenames were changed a while ago, but board.rst, consumer.rst and intro.rst still refer to the old names. Fix those references to match the Actual names and avoid possible confusion. Signed-off-by: Tom Schwindl Signed-off-by: Bartosz Golaszewski --- Documentation/driver-api/gpio/board.rst | 2 +- Documentation/driver-api/gpio/consumer.rst | 6 +++--- Documentation/driver-api/gpio/intro.rst | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst index 4e3adf31c8d1..b33aa04f213f 100644 --- a/Documentation/driver-api/gpio/board.rst +++ b/Documentation/driver-api/gpio/board.rst @@ -6,7 +6,7 @@ This document explains how GPIOs can be assigned to given devices and functions. Note that it only applies to the new descriptor-based interface. For a description of the deprecated integer-based GPIO interface please refer to -gpio-legacy.txt (actually, there is no real mapping possible with the old +legacy.rst (actually, there is no real mapping possible with the old interface; you just fetch an integer from somewhere and request the corresponding GPIO). diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index 47869ca8ccf0..72bcf5f5e3a2 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -4,7 +4,7 @@ GPIO Descriptor Consumer Interface This document describes the consumer interface of the GPIO framework. Note that it describes the new descriptor-based interface. For a description of the -deprecated integer-based GPIO interface please refer to gpio-legacy.txt. +deprecated integer-based GPIO interface please refer to legacy.rst. Guidelines for GPIOs consumers @@ -78,7 +78,7 @@ whether the line is configured active high or active low (see The two last flags are used for use cases where open drain is mandatory, such as I2C: if the line is not already configured as open drain in the mappings -(see board.txt), then open drain will be enforced anyway and a warning will be +(see board.rst), then open drain will be enforced anyway and a warning will be printed that the board configuration needs to be updated to match the use case. Both functions return either a valid GPIO descriptor, or an error code checkable @@ -270,7 +270,7 @@ driven. The same is applicable for open drain or open source output lines: those do not actively drive their output high (open drain) or low (open source), they just switch their output to a high impedance value. The consumer should not need to -care. (For details read about open drain in driver.txt.) +care. (For details read about open drain in driver.rst.) With this, all the gpiod_set_(array)_value_xxx() functions interpret the parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line diff --git a/Documentation/driver-api/gpio/intro.rst b/Documentation/driver-api/gpio/intro.rst index 2e924fb5b3d5..c9c19243b97f 100644 --- a/Documentation/driver-api/gpio/intro.rst +++ b/Documentation/driver-api/gpio/intro.rst @@ -14,12 +14,12 @@ Due to the history of GPIO interfaces in the kernel, there are two different ways to obtain and use GPIOs: - The descriptor-based interface is the preferred way to manipulate GPIOs, - and is described by all the files in this directory excepted gpio-legacy.txt. + and is described by all the files in this directory excepted legacy.rst. - The legacy integer-based interface which is considered deprecated (but still - usable for compatibility reasons) is documented in gpio-legacy.txt. + usable for compatibility reasons) is documented in legacy.rst. The remainder of this document applies to the new descriptor-based interface. -gpio-legacy.txt contains the same information applied to the legacy +legacy.rst contains the same information applied to the legacy integer-based interface. -- cgit v1.2.3 From a01a40e334996b05df92d5a9d594cb5937dd3cc0 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sun, 12 Jun 2022 13:23:09 +0200 Subject: gpio: realtek-otto: Make the irqchip immutable Since commit 6c846d026d49 ("gpio: Don't fiddle with irqchips marked as immutable") a warning is issued for the realtek-otto driver: gpio gpiochip0: (18003500.gpio): not an immutable chip, please consider fixing it! Make the driver's irqchip immutable to fix this. Signed-off-by: Sander Vanheule Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-realtek-otto.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-realtek-otto.c b/drivers/gpio/gpio-realtek-otto.c index c52b2cb1acae..63dcf42f7c20 100644 --- a/drivers/gpio/gpio-realtek-otto.c +++ b/drivers/gpio/gpio-realtek-otto.c @@ -172,6 +172,8 @@ static void realtek_gpio_irq_unmask(struct irq_data *data) unsigned long flags; u16 m; + gpiochip_enable_irq(&ctrl->gc, line); + raw_spin_lock_irqsave(&ctrl->lock, flags); m = ctrl->intr_mask[port]; m |= realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK); @@ -195,6 +197,8 @@ static void realtek_gpio_irq_mask(struct irq_data *data) ctrl->intr_mask[port] = m; realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m); raw_spin_unlock_irqrestore(&ctrl->lock, flags); + + gpiochip_disable_irq(&ctrl->gc, line); } static int realtek_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type) @@ -315,13 +319,15 @@ static int realtek_gpio_irq_init(struct gpio_chip *gc) return 0; } -static struct irq_chip realtek_gpio_irq_chip = { +static const struct irq_chip realtek_gpio_irq_chip = { .name = "realtek-otto-gpio", .irq_ack = realtek_gpio_irq_ack, .irq_mask = realtek_gpio_irq_mask, .irq_unmask = realtek_gpio_irq_unmask, .irq_set_type = realtek_gpio_irq_set_type, .irq_set_affinity = realtek_gpio_irq_set_affinity, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; static const struct of_device_id realtek_gpio_of_match[] = { @@ -404,7 +410,7 @@ static int realtek_gpio_probe(struct platform_device *pdev) irq = platform_get_irq_optional(pdev, 0); if (!(dev_flags & GPIO_INTERRUPTS_DISABLED) && irq > 0) { girq = &ctrl->gc.irq; - girq->chip = &realtek_gpio_irq_chip; + gpio_irq_chip_set_chip(girq, &realtek_gpio_irq_chip); girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->parent_handler = realtek_gpio_irq_handler; -- cgit v1.2.3 From 49e477610087a02c3604061b8f3ee3a25a493987 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 8 Jun 2022 09:13:34 -0700 Subject: drm/msm: Switch ordering of runpm put vs devfreq_idle In msm_devfreq_suspend() we cancel idle_work synchronously so that it doesn't run after we power of the hw or in the resume path. But this means that we want to ensure that idle_work is not scheduled *after* we no longer hold a runpm ref. So switch the ordering of pm_runtime_put() vs msm_devfreq_idle(). v2. Only move the runpm _put_autosuspend, and not the _mark_last_busy() Fixes: 9bc95570175a ("drm/msm: Devfreq tuning") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20210927152928.831245-1-robdclark@gmail.com Reviewed-by: Akhil P Oommen Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20220608161334.2140611-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index eb8a6663f309..244511f85044 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -672,7 +672,6 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_submit_retire(submit); pm_runtime_mark_last_busy(&gpu->pdev->dev); - pm_runtime_put_autosuspend(&gpu->pdev->dev); spin_lock_irqsave(&ring->submit_lock, flags); list_del(&submit->node); @@ -686,6 +685,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_devfreq_idle(gpu); mutex_unlock(&gpu->active_lock); + pm_runtime_put_autosuspend(&gpu->pdev->dev); + msm_gem_submit_put(submit); } -- cgit v1.2.3 From 93a8ba2a619816d631bd69e9ce2172b4d7a481b8 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 11 May 2022 18:08:23 +0200 Subject: ARM: dts: imx6qdl: correct PU regulator ramp delay Contrary to what was believed at the time, the ramp delay of 150us is not plenty for the PU LDO with the default step time of 512 pulses of the 24MHz clock. Measurements have shown that after enabling the LDO the voltage on VDDPU_CAP jumps to ~750mV in the first step and after that the regulator executes the normal ramp up as defined by the step size control. This means it takes the regulator between 360us and 370us to ramp up to the nominal 1.15V voltage for this power domain. With the old setting of the ramp delay the power up of the PU GPC domain would happen in the middle of the regulator ramp with the voltage being at around 900mV. Apparently this was enough for most units to properly power up the peripherals in the domain and execute the reset. Some units however, fail to power up properly, especially when the chip is at a low temperature. In that case any access to the GPU registers would yield an incorrect result with no way to recover from this situation. Change the ramp delay to 380us to cover the measured ramp up time with a bit of additional slack. Fixes: 40130d327f72 ("ARM: dts: imx6qdl: Allow disabling the PU regulator, add a enable ramp delay") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index d27beb47f9a3..652feff33496 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -762,7 +762,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; -- cgit v1.2.3 From b426310e509a1fde077fbe684ecc4a4a694d2bab Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Fri, 13 May 2022 12:26:12 +0200 Subject: ARM: dts: imx6qdl-colibri: Fix capacitive touch reset polarity The commit feedaacdadfc ("Input: atmel_mxt_ts - fix up inverted RESET handler") requires the reset GPIO to have GPIO_ACTIVE_LOW. Fixes: 1524b27c94a6 ("ARM: dts: imx6dl-colibri: Move common nodes to SoM dtsi") Reviewed-by: Fabio Estevam Signed-off-by: Max Krummenacher Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-colibri.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi index c383e0e4110c..7df270cea292 100644 --- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi @@ -593,7 +593,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_atmel_conn>; reg = <0x4a>; - reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */ + reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */ status = "disabled"; }; }; -- cgit v1.2.3 From 7c7eaeefb0ae226da9233d5db265652d900e1fcb Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 24 May 2022 09:39:34 +0200 Subject: soc: imx: imx8m-blk-ctrl: fix display clock for LCDIF2 power domain LCDIF2 has its own display clock, use this one. Fixes: 07614fed00e9 ("soc: imx: imx8m-blk-ctrl: Add i.MX8MP media blk-ctrl") Signed-off-by: Alexander Stein Reviewed-by: Paul Elder Tested-by: Martyn Welch Signed-off-by: Shawn Guo --- drivers/soc/imx/imx8m-blk-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c index 7f49385ed2f8..7ebc28709e94 100644 --- a/drivers/soc/imx/imx8m-blk-ctrl.c +++ b/drivers/soc/imx/imx8m-blk-ctrl.c @@ -667,7 +667,7 @@ static const struct imx8m_blk_ctrl_domain_data imx8mp_media_blk_ctl_domain_data[ }, [IMX8MP_MEDIABLK_PD_LCDIF_2] = { .name = "mediablk-lcdif-2", - .clk_names = (const char *[]){ "disp1", "apb", "axi", }, + .clk_names = (const char *[]){ "disp2", "apb", "axi", }, .num_clks = 3, .gpc_name = "lcdif2", .rst_mask = BIT(11) | BIT(12) | BIT(24), -- cgit v1.2.3 From 89931cb463d861faf987dbbff9db986fe59293f7 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Mon, 13 Jun 2022 09:19:20 +0200 Subject: ARM: dts: stm32: move SCMI related nodes in a dedicated file for stm32mp15 Adding a "secure" version of STM32 boards (DK1/DK2/ED1/EV1), SCMI (clock/ reset) protocol and OP-TEE node have been added in SoC dtsi file (stm32mp151.dtsi). They have been added with a status disabled in order to keep our legacy unchanged. It is actually not enough to keep our legacy unchanged. First, just a reminder about our use case: TF-A (BL2) loads and starts OP-TEE, then loads and runs U-Boot. U-Boot code checks if an OP-TEE is running, if yes it searches in Kernel device tree if an OP-TEE node is present: -If the OP-TEE node is not present then U-Boot copies OP-TEE node and its reserved memory region from U-Boot device tree to the kernel device tree. -If the OP-TEE node is present then it does nothing (this OP-TEE node will be used by Linux). So U-Boot lets the kernel device tree unchanged thinking it is correct for an OP-TEE usage. It is the case for our legacy boards, the OP-TEE node is present (although disabled) but the reserved memory region is not declared. As no memory region has been reserved for OP-TEE, the end of DDR is seen by the kernel as free and then used for CMA. But as OP-TEE is running, this end of DDR is already used by OP-TEE. So as soon as kernel tries to access to the CMA region OP-TEE raises an error. To fix it, all OP-TEE node and SCMI is moved in a dedicated file. Fixes: 40b4157dbd8c ("ARM: dts: stm32: enable optee firmware and SCMI support on STM32MP15") Signed-off-by: Alexandre Torgue Link: https://lore.kernel.org/r/20220613071920.5463-1-alexandre.torgue@foss.st.com' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/stm32mp15-scmi.dtsi | 47 ++++++++++++++++++++++++++++++ arch/arm/boot/dts/stm32mp151.dtsi | 41 -------------------------- arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts | 13 +-------- arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts | 13 +-------- arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts | 13 +-------- arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts | 13 +-------- 6 files changed, 51 insertions(+), 89 deletions(-) create mode 100644 arch/arm/boot/dts/stm32mp15-scmi.dtsi diff --git a/arch/arm/boot/dts/stm32mp15-scmi.dtsi b/arch/arm/boot/dts/stm32mp15-scmi.dtsi new file mode 100644 index 000000000000..e90cf3acd0b3 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp15-scmi.dtsi @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2022 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +/ { + firmware { + optee: optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + + scmi: scmi { + compatible = "linaro,scmi-optee"; + #address-cells = <1>; + #size-cells = <0>; + linaro,optee-channel-id = <0>; + shmem = <&scmi_shm>; + + scmi_clk: protocol@14 { + reg = <0x14>; + #clock-cells = <1>; + }; + + scmi_reset: protocol@16 { + reg = <0x16>; + #reset-cells = <1>; + }; + }; + }; + + soc { + scmi_sram: sram@2ffff000 { + compatible = "mmio-sram"; + reg = <0x2ffff000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x2ffff000 0x1000>; + + scmi_shm: scmi-sram@0 { + compatible = "arm,scmi-shmem"; + reg = <0 0x80>; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 1b2fd3426a81..7fdc324b3cf9 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -115,33 +115,6 @@ status = "disabled"; }; - firmware { - optee: optee { - compatible = "linaro,optee-tz"; - method = "smc"; - status = "disabled"; - }; - - scmi: scmi { - compatible = "linaro,scmi-optee"; - #address-cells = <1>; - #size-cells = <0>; - linaro,optee-channel-id = <0>; - shmem = <&scmi_shm>; - status = "disabled"; - - scmi_clk: protocol@14 { - reg = <0x14>; - #clock-cells = <1>; - }; - - scmi_reset: protocol@16 { - reg = <0x16>; - #reset-cells = <1>; - }; - }; - }; - soc { compatible = "simple-bus"; #address-cells = <1>; @@ -149,20 +122,6 @@ interrupt-parent = <&intc>; ranges; - scmi_sram: sram@2ffff000 { - compatible = "mmio-sram"; - reg = <0x2ffff000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x2ffff000 0x1000>; - - scmi_shm: scmi-sram@0 { - compatible = "arm,scmi-shmem"; - reg = <0 0x80>; - status = "disabled"; - }; - }; - timers2: timer@40000000 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts index e3d3f3f30c7d..36371d6ed660 100644 --- a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157a-dk1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board"; @@ -54,10 +55,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -76,11 +73,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts index 45dcd299aa9e..03226a596904 100644 --- a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-dk2.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board"; @@ -63,10 +64,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -85,11 +82,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts index 458e0ca3cded..c1a79272c068 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ed1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter"; @@ -59,10 +60,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -81,11 +78,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts index df9c113edb4b..7842384ddbe4 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ev1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother"; @@ -68,10 +69,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -90,11 +87,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; -- cgit v1.2.3 From 7c7ff68daa93d8c4cdea482da4f2429c0398fcde Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 1 Jun 2022 13:05:48 +0400 Subject: ARM: Fix refcount leak in axxia_boot_secondary of_find_compatible_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: 1d22924e1c4e ("ARM: Add platform support for LSI AXM55xx SoC") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220601090548.47616-1-linmq006@gmail.com' Signed-off-by: Arnd Bergmann --- arch/arm/mach-axxia/platsmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a..2e203626eda5 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; -- cgit v1.2.3 From be5cddef05f519a321a543906f255ac247246074 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 10 Jun 2022 13:40:29 +0300 Subject: bus: bt1-apb: Don't print error on -EPROBE_DEFER The Baikal-T1 APB bus driver correctly handles the deferred probe situation, but still pollutes the system log with a misleading error message. Let's fix that by using the dev_err_probe() method to print the log message in case of the clocks/resets request errors. Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20220610104030.28399-1-Sergey.Semin@baikalelectronics.ru' Signed-off-by: Arnd Bergmann --- drivers/bus/bt1-apb.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/bus/bt1-apb.c b/drivers/bus/bt1-apb.c index b25ff941e7c7..63b1b4a76671 100644 --- a/drivers/bus/bt1-apb.c +++ b/drivers/bus/bt1-apb.c @@ -175,10 +175,9 @@ static int bt1_apb_request_rst(struct bt1_apb *apb) int ret; apb->prst = devm_reset_control_get_optional_exclusive(apb->dev, "prst"); - if (IS_ERR(apb->prst)) { - dev_warn(apb->dev, "Couldn't get reset control line\n"); - return PTR_ERR(apb->prst); - } + if (IS_ERR(apb->prst)) + return dev_err_probe(apb->dev, PTR_ERR(apb->prst), + "Couldn't get reset control line\n"); ret = reset_control_deassert(apb->prst); if (ret) @@ -199,10 +198,9 @@ static int bt1_apb_request_clk(struct bt1_apb *apb) int ret; apb->pclk = devm_clk_get(apb->dev, "pclk"); - if (IS_ERR(apb->pclk)) { - dev_err(apb->dev, "Couldn't get APB clock descriptor\n"); - return PTR_ERR(apb->pclk); - } + if (IS_ERR(apb->pclk)) + return dev_err_probe(apb->dev, PTR_ERR(apb->pclk), + "Couldn't get APB clock descriptor\n"); ret = clk_prepare_enable(apb->pclk); if (ret) { -- cgit v1.2.3 From 5e93207e962a6d23893ff4405f6c5d4396fb5934 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 10 Jun 2022 13:40:30 +0300 Subject: bus: bt1-axi: Don't print error on -EPROBE_DEFER The Baikal-T1 AXI bus driver correctly handles the deferred probe situation, but still pollutes the system log with a misleading error message. Let's fix that by using the dev_err_probe() method to print the log message in case of the clocks/resets request errors. Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20220610104030.28399-2-Sergey.Semin@baikalelectronics.ru' Signed-off-by: Arnd Bergmann --- drivers/bus/bt1-axi.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/bus/bt1-axi.c b/drivers/bus/bt1-axi.c index e7a6744acc7b..70e49a6e5374 100644 --- a/drivers/bus/bt1-axi.c +++ b/drivers/bus/bt1-axi.c @@ -135,10 +135,9 @@ static int bt1_axi_request_rst(struct bt1_axi *axi) int ret; axi->arst = devm_reset_control_get_optional_exclusive(axi->dev, "arst"); - if (IS_ERR(axi->arst)) { - dev_warn(axi->dev, "Couldn't get reset control line\n"); - return PTR_ERR(axi->arst); - } + if (IS_ERR(axi->arst)) + return dev_err_probe(axi->dev, PTR_ERR(axi->arst), + "Couldn't get reset control line\n"); ret = reset_control_deassert(axi->arst); if (ret) @@ -159,10 +158,9 @@ static int bt1_axi_request_clk(struct bt1_axi *axi) int ret; axi->aclk = devm_clk_get(axi->dev, "aclk"); - if (IS_ERR(axi->aclk)) { - dev_err(axi->dev, "Couldn't get AXI Interconnect clock\n"); - return PTR_ERR(axi->aclk); - } + if (IS_ERR(axi->aclk)) + return dev_err_probe(axi->dev, PTR_ERR(axi->aclk), + "Couldn't get AXI Interconnect clock\n"); ret = clk_prepare_enable(axi->aclk); if (ret) { -- cgit v1.2.3 From 62b5e322fb6cc5a5a91fdeba0e4e57e75d9f4387 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 13 Jun 2022 18:10:19 -0400 Subject: drm/msm: use for_each_sgtable_sg to iterate over scatterlist The dma_map_sgtable() call (used to invalidate cache) overwrites sgt->nents with 1, so msm_iommu_pagetable_map maps only the first physical segment. To fix this problem use for_each_sgtable_sg(), which uses orig_nents. Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20220613221019.11399-1-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index bcaddbba564d..a54ed354578b 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -58,7 +58,7 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, u64 addr = iova; unsigned int i; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { + for_each_sgtable_sg(sgt, sg, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); -- cgit v1.2.3 From 933b5f9f98da29af646b51b36a0753692908ef64 Mon Sep 17 00:00:00 2001 From: Dmitry Klochkov Date: Tue, 14 Jun 2022 15:11:41 +0300 Subject: tools/kvm_stat: fix display of error when multiple processes are found Instead of printing an error message, kvm_stat script fails when we restrict statistics to a guest by its name and there are multiple guests with such name: # kvm_stat -g my_vm Traceback (most recent call last): File "/usr/bin/kvm_stat", line 1819, in main() File "/usr/bin/kvm_stat", line 1779, in main options = get_options() File "/usr/bin/kvm_stat", line 1718, in get_options options = argparser.parse_args() File "/usr/lib64/python3.10/argparse.py", line 1825, in parse_args args, argv = self.parse_known_args(args, namespace) File "/usr/lib64/python3.10/argparse.py", line 1858, in parse_known_args namespace, args = self._parse_known_args(args, namespace) File "/usr/lib64/python3.10/argparse.py", line 2067, in _parse_known_args start_index = consume_optional(start_index) File "/usr/lib64/python3.10/argparse.py", line 2007, in consume_optional take_action(action, args, option_string) File "/usr/lib64/python3.10/argparse.py", line 1935, in take_action action(self, namespace, argument_values, option_string) File "/usr/bin/kvm_stat", line 1649, in __call__ ' to specify the desired pid'.format(" ".join(pids))) TypeError: sequence item 0: expected str instance, int found To avoid this, it's needed to convert pids int values to strings before pass them to join(). Signed-off-by: Dmitry Klochkov Message-Id: <20220614121141.160689-1-kdmitry556@gmail.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 5a5bd74f55bd..9c366b3a676d 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. .format(values)) if len(pids) > 1: sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' - ' to specify the desired pid'.format(" ".join(pids))) + ' to specify the desired pid' + .format(" ".join(map(str, pids)))) namespace.pid = pids[0] argparser = argparse.ArgumentParser(description=description_text, -- cgit v1.2.3 From 311e03c29c255665e10a31910308de3777f21274 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 27 May 2022 10:23:40 -0700 Subject: drm/msm/gem: Separate object and vma unpin Previously the BO_PINNED state in the submit was tracking two related but different things: (1) that the buffer object was pinned, and (2) that the vma (mapping within a set of pagetables) was pinned. But with fenced vma unpin (needed so that userspace couldn't race with retire path for releasing a vma) these two were decoupled. The fact that the BO_PINNED flag was already cleared meant that we leaked the bo pin count which should have been dropped when the submit was retired. So split this state into BO_OBJ_PINNED and BO_VMA_PINNED, so they can be dropped independently. Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/487559/ Link: https://lore.kernel.org/r/20220527172341.2151005-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_gem.c | 7 +++---- drivers/gpu/drm/msm/msm_gem.h | 11 ++++++----- drivers/gpu/drm/msm/msm_gem_submit.c | 18 ++++++++++++------ drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 52fe6428a341..916e7f418fe1 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -439,14 +439,12 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) return ret; } -void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma) +void msm_gem_unpin_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); GEM_WARN_ON(!msm_gem_is_locked(obj)); - msm_gem_unpin_vma(vma); - msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); @@ -586,7 +584,8 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, msm_gem_lock(obj); vma = lookup_vma(obj, aspace); if (!GEM_WARN_ON(!vma)) { - msm_gem_unpin_vma_locked(obj, vma); + msm_gem_unpin_vma(vma); + msm_gem_unpin_locked(obj); } msm_gem_unlock(obj); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index c75d3b879a53..6b7d5bb3b575 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -145,7 +145,7 @@ struct msm_gem_object { uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma); -void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma); +void msm_gem_unpin_locked(struct drm_gem_object *obj); struct msm_gem_vma *msm_gem_get_vma_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace); int msm_gem_get_iova(struct drm_gem_object *obj, @@ -377,10 +377,11 @@ struct msm_gem_submit { } *cmd; /* array of size nr_cmds */ struct { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ -#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ -#define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_ACTIVE 0x2000 /* active refcnt is held */ -#define BO_PINNED 0x1000 /* obj is pinned and on active list */ +#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ +#define BO_LOCKED 0x4000 /* obj lock is held */ +#define BO_ACTIVE 0x2000 /* active refcnt is held */ +#define BO_OBJ_PINNED 0x1000 /* obj (pages) is pinned and on active list */ +#define BO_VMA_PINNED 0x0800 /* vma (virtual address) is pinned */ uint32_t flags; union { struct msm_gem_object *obj; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 9cd8c8708990..286124008445 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -232,8 +232,11 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, */ submit->bos[i].flags &= ~cleanup_flags; - if (flags & BO_PINNED) - msm_gem_unpin_vma_locked(obj, submit->bos[i].vma); + if (flags & BO_VMA_PINNED) + msm_gem_unpin_vma(submit->bos[i].vma); + + if (flags & BO_OBJ_PINNED) + msm_gem_unpin_locked(obj); if (flags & BO_ACTIVE) msm_gem_active_put(obj); @@ -244,7 +247,9 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) { - submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE | BO_LOCKED); + unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED | + BO_ACTIVE | BO_LOCKED; + submit_cleanup_bo(submit, i, cleanup_flags); if (!(submit->bos[i].flags & BO_VALID)) submit->bos[i].iova = 0; @@ -377,7 +382,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - submit->bos[i].flags |= BO_PINNED; + submit->bos[i].flags |= BO_OBJ_PINNED | BO_VMA_PINNED; submit->bos[i].vma = vma; if (vma->iova == submit->bos[i].iova) { @@ -511,7 +516,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) unsigned i; if (error) - cleanup_flags |= BO_PINNED | BO_ACTIVE; + cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED | BO_ACTIVE; for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -529,7 +534,8 @@ void msm_submit_retire(struct msm_gem_submit *submit) struct drm_gem_object *obj = &submit->bos[i].obj->base; msm_gem_lock(obj); - submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE); + /* Note, VMA already fence-unpinned before submit: */ + submit_cleanup_bo(submit, i, BO_OBJ_PINNED | BO_ACTIVE); msm_gem_unlock(obj); drm_gem_object_put(obj); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 43066320ff8c..56eecb4a72dc 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -25,7 +25,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_gem_lock(obj); msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx); - submit->bos[i].flags &= ~BO_PINNED; + submit->bos[i].flags &= ~BO_VMA_PINNED; msm_gem_unlock(obj); } -- cgit v1.2.3 From b4d329c451a299323b26039df97b32896fb46abc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 10 Jun 2022 10:20:55 -0700 Subject: drm/msm/gem: Drop early returns in close/purge vma Keep the warn, but drop the early return. If we do manage to hit this sort of issue, skipping the cleanup just makes things worse (dangling drm_mm_nodes when the msm_gem_vma is freed, etc). Whereas the worst that happens if we tear down a mapping the GPU is accessing is that we get GPU iova faults, but otherwise the world keeps spinning. Signed-off-by: Rob Clark Tested-by: Steev Klimaszewski Reported-by: Steev Klimaszewski Patchwork: https://patchwork.freedesktop.org/patch/489115/ Link: https://lore.kernel.org/r/20220610172055.2337977-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_gem_vma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 3c1dc9241831..c471aebcdbab 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -62,8 +62,7 @@ void msm_gem_purge_vma(struct msm_gem_address_space *aspace, unsigned size = vma->node.size; /* Print a message if we try to purge a vma in use */ - if (GEM_WARN_ON(msm_gem_vma_inuse(vma))) - return; + GEM_WARN_ON(msm_gem_vma_inuse(vma)); /* Don't do anything if the memory isn't mapped */ if (!vma->mapped) @@ -128,8 +127,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace, void msm_gem_close_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma) { - if (GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped)) - return; + GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped); spin_lock(&aspace->lock); if (vma->iova) -- cgit v1.2.3 From 920169041baa0a7497ed702aa97d6a2d6285efd3 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 14 Jun 2022 02:31:00 -0500 Subject: drm/sun4i: dw-hdmi: Fix ddc-en GPIO consumer conflict commit 6de79dd3a920 ("drm/bridge: display-connector: add ddc-en gpio support") added a consumer for this GPIO in the HDMI connector device. This new consumer conflicts with the pre-existing GPIO consumer in the sun8i HDMI controller driver, which prevents the driver from probing: [ 4.983358] display-connector connector: GPIO lookup for consumer ddc-en [ 4.983364] display-connector connector: using device tree for GPIO lookup [ 4.983392] gpio-226 (ddc-en): gpiod_request: status -16 [ 4.983399] sun8i-dw-hdmi 6000000.hdmi: Couldn't get ddc-en gpio [ 4.983618] sun4i-drm display-engine: failed to bind 6000000.hdmi (ops sun8i_dw_hdmi_ops [sun8i_drm_hdmi]): -16 [ 4.984082] sun4i-drm display-engine: Couldn't bind all pipelines components [ 4.984171] sun4i-drm display-engine: adev bind failed: -16 [ 4.984179] sun8i-dw-hdmi: probe of 6000000.hdmi failed with error -16 Both drivers have the same behavior: they leave the GPIO active for the life of the device. Let's take advantage of the new implementation, and drop the now-obsolete code from the HDMI controller driver. Fixes: 6de79dd3a920 ("drm/bridge: display-connector: add ddc-en gpio support") Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220614073100.11550-1-samuel@sholland.org --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 54 +++-------------------------------- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h | 2 -- 2 files changed, 4 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index a8d75fd7e9f4..477cb6985b4d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -93,34 +93,10 @@ crtcs_exit: return crtcs; } -static int sun8i_dw_hdmi_find_connector_pdev(struct device *dev, - struct platform_device **pdev_out) -{ - struct platform_device *pdev; - struct device_node *remote; - - remote = of_graph_get_remote_node(dev->of_node, 1, -1); - if (!remote) - return -ENODEV; - - if (!of_device_is_compatible(remote, "hdmi-connector")) { - of_node_put(remote); - return -ENODEV; - } - - pdev = of_find_device_by_node(remote); - of_node_put(remote); - if (!pdev) - return -ENODEV; - - *pdev_out = pdev; - return 0; -} - static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, void *data) { - struct platform_device *pdev = to_platform_device(dev), *connector_pdev; + struct platform_device *pdev = to_platform_device(dev); struct dw_hdmi_plat_data *plat_data; struct drm_device *drm = data; struct device_node *phy_node; @@ -167,30 +143,16 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, return dev_err_probe(dev, PTR_ERR(hdmi->regulator), "Couldn't get regulator\n"); - ret = sun8i_dw_hdmi_find_connector_pdev(dev, &connector_pdev); - if (!ret) { - hdmi->ddc_en = gpiod_get_optional(&connector_pdev->dev, - "ddc-en", GPIOD_OUT_HIGH); - platform_device_put(connector_pdev); - - if (IS_ERR(hdmi->ddc_en)) { - dev_err(dev, "Couldn't get ddc-en gpio\n"); - return PTR_ERR(hdmi->ddc_en); - } - } - ret = regulator_enable(hdmi->regulator); if (ret) { dev_err(dev, "Failed to enable regulator\n"); - goto err_unref_ddc_en; + return ret; } - gpiod_set_value(hdmi->ddc_en, 1); - ret = reset_control_deassert(hdmi->rst_ctrl); if (ret) { dev_err(dev, "Could not deassert ctrl reset control\n"); - goto err_disable_ddc_en; + goto err_disable_regulator; } ret = clk_prepare_enable(hdmi->clk_tmds); @@ -245,12 +207,8 @@ err_disable_clk_tmds: clk_disable_unprepare(hdmi->clk_tmds); err_assert_ctrl_reset: reset_control_assert(hdmi->rst_ctrl); -err_disable_ddc_en: - gpiod_set_value(hdmi->ddc_en, 0); +err_disable_regulator: regulator_disable(hdmi->regulator); -err_unref_ddc_en: - if (hdmi->ddc_en) - gpiod_put(hdmi->ddc_en); return ret; } @@ -264,11 +222,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); - gpiod_set_value(hdmi->ddc_en, 0); regulator_disable(hdmi->regulator); - - if (hdmi->ddc_en) - gpiod_put(hdmi->ddc_en); } static const struct component_ops sun8i_dw_hdmi_ops = { diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index bffe1b9cd3dc..9ad09522947a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -193,7 +192,6 @@ struct sun8i_dw_hdmi { struct regulator *regulator; const struct sun8i_dw_hdmi_quirks *quirks; struct reset_control *rst_ctrl; - struct gpio_desc *ddc_en; }; extern struct platform_driver sun8i_hdmi_phy_driver; -- cgit v1.2.3 From 1342b5b23da9559a1578978eaff7f797d8a87d91 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Jun 2022 00:42:53 -0500 Subject: drm/sun4i: Fix crash during suspend after component bind failure If the component driver fails to bind, or is unbound, the driver data for the top-level platform device points to a freed drm_device. If the system is then suspended, the driver passes this dangling pointer to drm_mode_config_helper_suspend(), which crashes. Fix this by only setting the driver data while the platform driver holds a reference to the drm_device. Fixes: 624b4b48d9d8 ("drm: sun4i: Add support for suspending the display driver") Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220615054254.16352-1-samuel@sholland.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 275f7e4a03ae..8841dba989ee 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -73,7 +73,6 @@ static int sun4i_drv_bind(struct device *dev) goto free_drm; } - dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); @@ -114,6 +113,8 @@ static int sun4i_drv_bind(struct device *dev) drm_fbdev_generic_setup(drm, 32); + dev_set_drvdata(dev, drm); + return 0; finish_poll: @@ -130,6 +131,7 @@ static void sun4i_drv_unbind(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); + dev_set_drvdata(dev, NULL); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); -- cgit v1.2.3 From cb468c7d84d174ab9cd638be9f5b3f1ba2b311a0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:36 +0200 Subject: drm/vc4: plane: Prevent async update if we don't have a dlist The vc4 planes are setup in hardware by creating a hardware descriptor in a dedicated RAM. As part of the process to setup a plane in KMS, we thus need to allocate some part of that dedicated RAM to store our descriptor there. The async update path will just reuse the descriptor already allocated for that plane and will modify it directly in RAM to match whatever has been asked for. In order to do that, it will compare the descriptor for the old plane state and the new plane state, will make sure they fit in the same size, and check that only the position or buffer address have changed. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_plane.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index b3438f4a81ce..811a2d004cc4 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -1321,6 +1321,10 @@ static int vc4_plane_atomic_async_check(struct drm_plane *plane, old_vc4_state = to_vc4_plane_state(plane->state); new_vc4_state = to_vc4_plane_state(new_plane_state); + + if (!new_vc4_state->hw_dlist) + return -EINVAL; + if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || -- cgit v1.2.3 From 1cbc91eb7b518cc5f80442ff9b517dc5b9d3b849 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:37 +0200 Subject: drm/vc4: Consolidate Hardware Revision Check A new generation of controller has been introduced with the BCM2711/RaspberryPi4. This generation needs a bunch of quirks, and over time we've piled on a number of checks in most parts of the drivers. All these checks are performed several times, and are not always consistent. Let's create a single, global, variable to hold it and use it everywhere. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-3-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 6 +++--- drivers/gpu/drm/vc4/vc4_drv.c | 4 ++++ drivers/gpu/drm/vc4/vc4_drv.h | 6 +++--- drivers/gpu/drm/vc4/vc4_hvs.c | 18 +++++++++--------- drivers/gpu/drm/vc4/vc4_kms.c | 12 +++++------- drivers/gpu/drm/vc4/vc4_plane.c | 13 ++++++------- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 59b20c8f132b..dd5fb25d0f43 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -256,7 +256,7 @@ static u32 vc4_get_fifo_full_level(struct vc4_crtc *vc4_crtc, u32 format) * Removing 1 from the FIFO full level however * seems to completely remove that issue. */ - if (!vc4->hvs->hvs5) + if (!vc4->is_vc5) return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX - 1; return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX; @@ -389,7 +389,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode if (is_dsi) CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep); - if (vc4->hvs->hvs5) + if (vc4->is_vc5) CRTC_WRITE(PV_MUX_CFG, VC4_SET_FIELD(PV_MUX_CFG_RGB_PIXEL_MUX_MODE_NO_SWAP, PV_MUX_CFG_RGB_PIXEL_MUX_MODE)); @@ -1149,7 +1149,7 @@ int vc4_crtc_init(struct drm_device *drm, struct vc4_crtc *vc4_crtc, crtc_funcs, NULL); drm_crtc_helper_add(crtc, crtc_helper_funcs); - if (!vc4->hvs->hvs5) { + if (!vc4->is_vc5) { drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 162bc18e7497..53067525b586 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -217,10 +217,13 @@ static int vc4_drm_bind(struct device *dev) struct vc4_dev *vc4; struct device_node *node; struct drm_crtc *crtc; + bool is_vc5; int ret = 0; dev->coherent_dma_mask = DMA_BIT_MASK(32); + is_vc5 = of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5"); + /* If VC4 V3D is missing, don't advertise render nodes. */ node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL); if (!node || !of_device_is_available(node)) @@ -230,6 +233,7 @@ static int vc4_drm_bind(struct device *dev) vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base); if (IS_ERR(vc4)) return PTR_ERR(vc4); + vc4->is_vc5 = is_vc5; drm = &vc4->base; platform_set_drvdata(pdev, drm); diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 15e0c2ac3940..82453a3bcffe 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -74,6 +74,8 @@ struct vc4_perfmon { struct vc4_dev { struct drm_device base; + bool is_vc5; + unsigned int irq; struct vc4_hvs *hvs; @@ -316,6 +318,7 @@ struct vc4_v3d { }; struct vc4_hvs { + struct vc4_dev *vc4; struct platform_device *pdev; void __iomem *regs; u32 __iomem *dlist; @@ -333,9 +336,6 @@ struct vc4_hvs { struct drm_mm_node mitchell_netravali_filter; struct debugfs_regset32 regset; - - /* HVS version 5 flag, therefore requires updated dlist structures */ - bool hvs5; }; struct vc4_plane { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 2a58fc421cf6..ba2c8e5a9b64 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -220,10 +220,11 @@ u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo) int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) { + struct vc4_dev *vc4 = hvs->vc4; u32 reg; int ret; - if (!hvs->hvs5) + if (!vc4->is_vc5) return output; switch (output) { @@ -273,6 +274,7 @@ int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, struct drm_display_mode *mode, bool oneshot) { + struct vc4_dev *vc4 = hvs->vc4; struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state); unsigned int chan = vc4_crtc_state->assigned_channel; @@ -291,7 +293,7 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, */ dispctrl = SCALER_DISPCTRLX_ENABLE; - if (!hvs->hvs5) + if (!vc4->is_vc5) dispctrl |= VC4_SET_FIELD(mode->hdisplay, SCALER_DISPCTRLX_WIDTH) | VC4_SET_FIELD(mode->vdisplay, @@ -312,7 +314,7 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx | SCALER_DISPBKGND_AUTOHS | - ((!hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) | + ((!vc4->is_vc5) ? SCALER_DISPBKGND_GAMMA : 0) | (interlace ? SCALER_DISPBKGND_INTERLACE : 0)); /* Reload the LUT, since the SRAMs would have been disabled if @@ -617,11 +619,9 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) if (!hvs) return -ENOMEM; + hvs->vc4 = vc4; hvs->pdev = pdev; - if (of_device_is_compatible(pdev->dev.of_node, "brcm,bcm2711-hvs")) - hvs->hvs5 = true; - hvs->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(hvs->regs)) return PTR_ERR(hvs->regs); @@ -630,7 +630,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) hvs->regset.regs = hvs_regs; hvs->regset.nregs = ARRAY_SIZE(hvs_regs); - if (hvs->hvs5) { + if (vc4->is_vc5) { hvs->core_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(hvs->core_clk)) { dev_err(&pdev->dev, "Couldn't get core clock\n"); @@ -644,7 +644,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) } } - if (!hvs->hvs5) + if (!vc4->is_vc5) hvs->dlist = hvs->regs + SCALER_DLIST_START; else hvs->dlist = hvs->regs + SCALER5_DLIST_START; @@ -665,7 +665,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) * between planes when they don't overlap on the screen, but * for now we just allocate globally. */ - if (!hvs->hvs5) + if (!vc4->is_vc5) /* 48k words of 2x12-bit pixels */ drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); else diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index c169bd72e53b..3c232d85ab85 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -393,7 +393,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) old_hvs_state->fifo_state[channel].pending_commit = NULL; } - if (vc4->hvs->hvs5) { + if (vc4->is_vc5) { unsigned long state_rate = max(old_hvs_state->core_clock_rate, new_hvs_state->core_clock_rate); unsigned long core_rate = max_t(unsigned long, @@ -412,7 +412,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) vc4_ctm_commit(vc4, state); - if (vc4->hvs->hvs5) + if (vc4->is_vc5) vc5_hvs_pv_muxing_commit(vc4, state); else vc4_hvs_pv_muxing_commit(vc4, state); @@ -430,7 +430,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_cleanup_planes(dev, state); - if (vc4->hvs->hvs5) { + if (vc4->is_vc5) { drm_dbg(dev, "Running the core clock at %lu Hz\n", new_hvs_state->core_clock_rate); @@ -1000,8 +1000,6 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - bool is_vc5 = of_device_is_compatible(dev->dev->of_node, - "brcm,bcm2711-vc5"); int ret; /* @@ -1009,7 +1007,7 @@ int vc4_kms_load(struct drm_device *dev) * the BCM2711, but the load tracker computations are used for * the core clock rate calculation. */ - if (!is_vc5) { + if (!vc4->is_vc5) { /* Start with the load tracker enabled. Can be * disabled through the debugfs load_tracker file. */ @@ -1025,7 +1023,7 @@ int vc4_kms_load(struct drm_device *dev) return ret; } - if (is_vc5) { + if (vc4->is_vc5) { dev->mode_config.max_width = 7680; dev->mode_config.max_height = 7680; } else { diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 811a2d004cc4..ba7359516d75 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -489,10 +489,10 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) } /* Align it to 64 or 128 (hvs5) bytes */ - lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); + lbm = roundup(lbm, vc4->is_vc5 ? 128 : 64); /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ - lbm /= vc4->hvs->hvs5 ? 4 : 2; + lbm /= vc4->is_vc5 ? 4 : 2; return lbm; } @@ -608,7 +608,7 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state) ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, &vc4_state->lbm, lbm_size, - vc4->hvs->hvs5 ? 64 : 32, + vc4->is_vc5 ? 64 : 32, 0, 0); spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); @@ -917,7 +917,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && fb->format->has_alpha; - if (!vc4->hvs->hvs5) { + if (!vc4->is_vc5) { /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | @@ -1457,14 +1457,13 @@ static const struct drm_plane_funcs vc4_plane_funcs = { struct drm_plane *vc4_plane_init(struct drm_device *dev, enum drm_plane_type type) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_plane *plane = NULL; struct vc4_plane *vc4_plane; u32 formats[ARRAY_SIZE(hvs_formats)]; int num_formats = 0; int ret = 0; unsigned i; - bool hvs5 = of_device_is_compatible(dev->dev->of_node, - "brcm,bcm2711-vc5"); static const uint64_t modifiers[] = { DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, DRM_FORMAT_MOD_BROADCOM_SAND128, @@ -1480,7 +1479,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, return ERR_PTR(-ENOMEM); for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { - if (!hvs_formats[i].hvs5_only || hvs5) { + if (!hvs_formats[i].hvs5_only || vc4->is_vc5) { formats[num_formats] = hvs_formats[i].drm; num_formats++; } -- cgit v1.2.3 From dd2dfd44edc5ba5b2de3c2e6c1c823a62e8f1e92 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:38 +0200 Subject: drm/vc4: bo: Rename vc4_dumb_create We're going to add a new variant of the dumb BO allocation function, so let's rename vc4_dumb_create() to something a bit more specific. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-4-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_bo.c | 6 +++--- drivers/gpu/drm/vc4/vc4_drv.c | 2 +- drivers/gpu/drm/vc4/vc4_drv.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 49c0f2ac868b..6d505da6b6cf 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -471,9 +471,9 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return bo; } -int vc4_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) +int vc4_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) { int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); struct vc4_bo *bo = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 53067525b586..5f39e40ef238 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -175,7 +175,7 @@ static struct drm_driver vc4_drm_driver = { .gem_create_object = vc4_create_object, - DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_dumb_create), + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_bo_dumb_create), .ioctls = vc4_drm_ioctls, .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls), diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 82453a3bcffe..37c93654480f 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -814,9 +814,9 @@ struct vc4_validated_shader_info { struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, bool from_cache, enum vc4_kernel_bo_type type); -int vc4_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); +int vc4_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, -- cgit v1.2.3 From 3d7637423be8340f40a669beb253aabbf08239ca Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:39 +0200 Subject: drm/vc4: bo: Split out Dumb buffers fixup The vc4_bo_dumb_create() both fixes up the allocation arguments to match the hardware constraints and actually performs the allocation. Since we're going to introduce a new function that uses a different allocator, let's split the arguments fixup to a separate function we will be able to reuse. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-5-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_bo.c | 9 +++------ drivers/gpu/drm/vc4/vc4_drv.c | 13 +++++++++++++ drivers/gpu/drm/vc4/vc4_drv.h | 1 + 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 6d505da6b6cf..3ca16d682fc0 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -475,15 +475,12 @@ int vc4_bo_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) { - int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); struct vc4_bo *bo = NULL; int ret; - if (args->pitch < min_pitch) - args->pitch = min_pitch; - - if (args->size < args->pitch * args->height) - args->size = args->pitch * args->height; + ret = vc4_dumb_fixup_args(args); + if (ret) + return ret; bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB); if (IS_ERR(bo)) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 5f39e40ef238..eb08940028d3 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -63,6 +63,19 @@ void __iomem *vc4_ioremap_regs(struct platform_device *pdev, int index) return map; } +int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args) +{ + int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + + if (args->pitch < min_pitch) + args->pitch = min_pitch; + + if (args->size < args->pitch * args->height) + args->size = args->pitch * args->height; + + return 0; +} + static int vc4_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 37c93654480f..9c324c12c410 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -885,6 +885,7 @@ static inline void vc4_debugfs_add_regset32(struct drm_device *drm, /* vc4_drv.c */ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); +int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); /* vc4_dpi.c */ extern struct platform_driver vc4_dpi_driver; -- cgit v1.2.3 From 538f111160618ef56743a5302e114530edb7df77 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:40 +0200 Subject: drm/vc4: drv: Register a different driver on BCM2711 Prior to the BCM2711/RaspberryPi4, the GPU was a part of the display components of the SoC. It was thus a part of the vc4 driver. However, with the BCM2711, it got split out and thus the v3d driver was created. The vc4 driver now only handles the display part. We didn't properly split out the code when doing the BCM2711 support though, and most of the code around buffer allocations is still involved, even though it doesn't have the backing hardware anymore. Let's start the split out by creating a new drm_driver that only reports and uses what we support on the BCM2711. The ioctl were properly filtered already, but we were still exposing a .gem_create_object hook, as well as having an .open and .postclose hooks which are only relevant on older generations. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-6-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_drv.c | 51 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index eb08940028d3..528a1e2761f1 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -76,6 +76,19 @@ int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args) return 0; } +static int vc5_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + int ret; + + ret = vc4_dumb_fixup_args(args); + if (ret) + return ret; + + return drm_gem_cma_dumb_create_internal(file_priv, dev, args); +} + static int vc4_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -173,7 +186,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), }; -static struct drm_driver vc4_drm_driver = { +static const struct drm_driver vc4_drm_driver = { .driver_features = (DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM | @@ -202,6 +215,27 @@ static struct drm_driver vc4_drm_driver = { .patchlevel = DRIVER_PATCHLEVEL, }; +static const struct drm_driver vc5_drm_driver = { + .driver_features = (DRIVER_MODESET | + DRIVER_ATOMIC | + DRIVER_GEM), + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = vc4_debugfs_init, +#endif + + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc5_dumb_create), + + .fops = &vc4_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + static void vc4_match_add_drivers(struct device *dev, struct component_match **match, struct platform_driver *const *drivers, @@ -225,6 +259,7 @@ static void vc4_match_add_drivers(struct device *dev, static int vc4_drm_bind(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); + const struct drm_driver *driver; struct rpi_firmware *firmware = NULL; struct drm_device *drm; struct vc4_dev *vc4; @@ -236,14 +271,12 @@ static int vc4_drm_bind(struct device *dev) dev->coherent_dma_mask = DMA_BIT_MASK(32); is_vc5 = of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5"); + if (is_vc5) + driver = &vc5_drm_driver; + else + driver = &vc4_drm_driver; - /* If VC4 V3D is missing, don't advertise render nodes. */ - node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL); - if (!node || !of_device_is_available(node)) - vc4_drm_driver.driver_features &= ~DRIVER_RENDER; - of_node_put(node); - - vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base); + vc4 = devm_drm_dev_alloc(dev, driver, struct vc4_dev, base); if (IS_ERR(vc4)) return PTR_ERR(vc4); vc4->is_vc5 = is_vc5; @@ -275,7 +308,7 @@ static int vc4_drm_bind(struct device *dev) return -EPROBE_DEFER; } - ret = drm_aperture_remove_framebuffers(false, &vc4_drm_driver); + ret = drm_aperture_remove_framebuffers(false, driver); if (ret) return ret; -- cgit v1.2.3 From 39a30ec64510f71d2a9f5059a7fc1283c4108a35 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:41 +0200 Subject: drm/vc4: kms: Register a different drm_mode_config_funcs on BCM2711 On the BCM2711, our current definition of drm_mode_config_funcs uses the custom vc4_fb_create(). However, that function relies on the buffer allocation path that was relying on the GPU, and is no longer relevant. Let's create another drm_mode_config_funcs structure that we will register on the BCM2711. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-7-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 3c232d85ab85..1d3b31fb71ea 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -997,6 +997,12 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { .fb_create = vc4_fb_create, }; +static const struct drm_mode_config_funcs vc5_mode_funcs = { + .atomic_check = vc4_atomic_check, + .atomic_commit = drm_atomic_helper_commit, + .fb_create = drm_gem_fb_create, +}; + int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -1031,7 +1037,7 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.max_height = 2048; } - dev->mode_config.funcs = &vc4_mode_funcs; + dev->mode_config.funcs = vc4->is_vc5 ? &vc5_mode_funcs : &vc4_mode_funcs; dev->mode_config.helper_private = &vc4_mode_config_helpers; dev->mode_config.preferred_depth = 24; dev->mode_config.async_page_flip = true; -- cgit v1.2.3 From 2095848661481e31339b32847acf7759b5635f38 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:42 +0200 Subject: drm/vc4: plane: Register a different drm_plane_helper_funcs on BCM2711 On the BCM2711, our current definition of drm_plane_helper_funcs uses the custom vc4_prepare_fb() and vc4_cleanup_fb(). Those functions rely on the buffer allocation path that was relying on the GPU, and is no longer relevant. Let's create another drm_plane_helper_funcs structure that we will register on the BCM2711. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-8-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_plane.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ba7359516d75..1e866dc00ac3 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -1389,6 +1389,13 @@ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_async_update = vc4_plane_atomic_async_update, }; +static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { + .atomic_check = vc4_plane_atomic_check, + .atomic_update = vc4_plane_atomic_update, + .atomic_async_check = vc4_plane_atomic_async_check, + .atomic_async_update = vc4_plane_atomic_async_update, +}; + static bool vc4_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -1493,7 +1500,10 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, if (ret) return ERR_PTR(ret); - drm_plane_helper_add(plane, &vc4_plane_helper_funcs); + if (vc4->is_vc5) + drm_plane_helper_add(plane, &vc5_plane_helper_funcs); + else + drm_plane_helper_add(plane, &vc4_plane_helper_funcs); drm_plane_create_alpha_property(plane); drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, -- cgit v1.2.3 From 257add942a477bb99bdf4bacc6190703f796dcff Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:43 +0200 Subject: drm/vc4: drv: Skip BO Backend Initialization on BCM2711 On the BCM2711, we currently call the vc4_bo_cache_init() and vc4_gem_init() functions. These functions initialize the BO and GEM backends. However, this code was initially created to accomodate the requirements of the GPU on the older SoCs, while the BCM2711 has a separate driver for it. So let's just skip these calls when we're on a newer hardware. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-9-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_drv.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 528a1e2761f1..ef4ab0563168 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -285,19 +285,23 @@ static int vc4_drm_bind(struct device *dev) platform_set_drvdata(pdev, drm); INIT_LIST_HEAD(&vc4->debugfs_list); - mutex_init(&vc4->bin_bo_lock); + if (!is_vc5) { + mutex_init(&vc4->bin_bo_lock); - ret = vc4_bo_cache_init(drm); - if (ret) - return ret; + ret = vc4_bo_cache_init(drm); + if (ret) + return ret; + } ret = drmm_mode_config_init(drm); if (ret) return ret; - ret = vc4_gem_init(drm); - if (ret) - return ret; + if (!is_vc5) { + ret = vc4_gem_init(drm); + if (ret) + return ret; + } node = of_find_compatible_node(NULL, NULL, "raspberrypi,bcm2835-firmware"); if (node) { -- cgit v1.2.3 From 2523e9dcc3be91bf9fdc0d1e542557ca00bbef42 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:44 +0200 Subject: drm/vc4: crtc: Use an union to store the page flip callback We'll need to extend the vc4_async_flip_state structure to rely on another callback implementation, so let's move the current one into a union. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-10-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index dd5fb25d0f43..1f247c037ce0 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -775,17 +775,17 @@ struct vc4_async_flip_state { struct drm_framebuffer *old_fb; struct drm_pending_vblank_event *event; - struct vc4_seqno_cb cb; + union { + struct vc4_seqno_cb seqno; + } cb; }; /* Called when the V3D execution for the BO being flipped to is done, so that * we can actually update the plane's address to point to it. */ static void -vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) +vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state) { - struct vc4_async_flip_state *flip_state = - container_of(cb, struct vc4_async_flip_state, cb); struct drm_crtc *crtc = flip_state->crtc; struct drm_device *dev = crtc->dev; struct drm_plane *plane = crtc->primary; @@ -821,6 +821,14 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) kfree(flip_state); } +static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) +{ + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb.seqno); + + vc4_async_page_flip_complete(flip_state); +} + /* Implements async (non-vblank-synced) page flips. * * The page flip ioctl needs to return immediately, so we grab the @@ -881,8 +889,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, */ drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno, - vc4_async_page_flip_complete); + vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, + vc4_async_page_flip_seqno_complete); /* Driver takes ownership of state on successful async commit. */ return 0; -- cgit v1.2.3 From 4d12c36fb73b5c49fe2f95d06515fd9846010fd2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:45 +0200 Subject: drm/vc4: crtc: Move the BO handling out of common page-flip callback We'll soon introduce another completion callback source that won't need to use the BO reference counting, so let's move it around to create a function we will be able to share between both callbacks. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-11-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 1f247c037ce0..0410db97b9d1 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -802,21 +802,8 @@ vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state) drm_crtc_vblank_put(crtc); drm_framebuffer_put(flip_state->fb); - /* Decrement the BO usecnt in order to keep the inc/dec calls balanced - * when the planes are updated through the async update path. - * FIXME: we should move to generic async-page-flip when it's - * available, so that we can get rid of this hand-made cleanup_fb() - * logic. - */ - if (flip_state->old_fb) { - struct drm_gem_cma_object *cma_bo; - struct vc4_bo *bo; - - cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); - bo = to_vc4_bo(&cma_bo->base); - vc4_bo_dec_usecnt(bo); + if (flip_state->old_fb) drm_framebuffer_put(flip_state->old_fb); - } kfree(flip_state); } @@ -825,8 +812,27 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) { struct vc4_async_flip_state *flip_state = container_of(cb, struct vc4_async_flip_state, cb.seqno); + struct vc4_bo *bo = NULL; + + if (flip_state->old_fb) { + struct drm_gem_cma_object *cma_bo = + drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); + bo = to_vc4_bo(&cma_bo->base); + } vc4_async_page_flip_complete(flip_state); + + /* + * Decrement the BO usecnt in order to keep the inc/dec + * calls balanced when the planes are updated through + * the async update path. + * + * FIXME: we should move to generic async-page-flip when + * it's available, so that we can get rid of this + * hand-made cleanup_fb() logic. + */ + if (bo) + vc4_bo_dec_usecnt(bo); } /* Implements async (non-vblank-synced) page flips. -- cgit v1.2.3 From f6766fb265b18248d2c4bc643eb99e853f293dd6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:46 +0200 Subject: drm/vc4: crtc: Move the BO Handling out of Common Page-Flip Handler The function vc4_async_page_flip() handles asynchronous page-flips in the vc4 driver. However, it mixes some generic code with code that should only be run on older generations that have the GPU handled by the vc4 driver. Let's split the generic part out of vc4_async_page_flip() and into a common function that we be reusable by an handler made for the BCM2711. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-12-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 73 +++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 0410db97b9d1..c00fb964c534 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -835,40 +835,21 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) vc4_bo_dec_usecnt(bo); } -/* Implements async (non-vblank-synced) page flips. - * - * The page flip ioctl needs to return immediately, so we grab the - * modeset semaphore on the pipe, and queue the address update for - * when V3D is done with the BO being flipped to. - */ -static int vc4_async_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t flags) +static int +vc4_async_page_flip_common(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) { struct drm_device *dev = crtc->dev; struct drm_plane *plane = crtc->primary; - int ret = 0; struct vc4_async_flip_state *flip_state; struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); - /* Increment the BO usecnt here, so that we never end up with an - * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the - * plane is later updated through the non-async path. - * FIXME: we should move to generic async-page-flip when it's - * available, so that we can get rid of this hand-made prepare_fb() - * logic. - */ - ret = vc4_bo_inc_usecnt(bo); - if (ret) - return ret; - flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); - if (!flip_state) { - vc4_bo_dec_usecnt(bo); + if (!flip_state) return -ENOMEM; - } drm_framebuffer_get(fb); flip_state->fb = fb; @@ -902,6 +883,48 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, return 0; } +/* Implements async (non-vblank-synced) page flips. + * + * The page flip ioctl needs to return immediately, so we grab the + * modeset semaphore on the pipe, and queue the address update for + * when V3D is done with the BO being flipped to. + */ +static int vc4_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + int ret; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + + /* + * Increment the BO usecnt here, so that we never end up with an + * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the + * plane is later updated through the non-async path. + * + * FIXME: we should move to generic async-page-flip when + * it's available, so that we can get rid of this + * hand-made prepare_fb() logic. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + + ret = vc4_async_page_flip_common(crtc, fb, event, flags); + if (ret) { + vc4_bo_dec_usecnt(bo); + return ret; + } + + return 0; +} + int vc4_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, -- cgit v1.2.3 From d87db1c79d6f9ec5505be2ff4ca8811d6e88a667 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:47 +0200 Subject: drm/vc4: crtc: Don't call into BO Handling on Async Page-Flips on BCM2711 The BCM2711 doesn't have a v3d GPU so we don't want to call into its BO management code. Let's create an asynchronous page-flip handler for the BCM2711 that just calls into the common code. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-13-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index c00fb964c534..a3c04d6cbd20 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -925,16 +925,31 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, return 0; } +static int vc5_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + return vc4_async_page_flip_common(crtc, fb, event, flags); +} + int vc4_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t flags, struct drm_modeset_acquire_ctx *ctx) { - if (flags & DRM_MODE_PAGE_FLIP_ASYNC) - return vc4_async_page_flip(crtc, fb, event, flags); - else + if (flags & DRM_MODE_PAGE_FLIP_ASYNC) { + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + + if (vc4->is_vc5) + return vc5_async_page_flip(crtc, fb, event, flags); + else + return vc4_async_page_flip(crtc, fb, event, flags); + } else { return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx); + } } struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) -- cgit v1.2.3 From d19e00ee06a9abf590b178c34cad637a516752f8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:48 +0200 Subject: drm/vc4: crtc: Fix out of order frames during asynchronous page flips When doing an asynchronous page flip (PAGE_FLIP ioctl with the DRM_MODE_PAGE_FLIP_ASYNC flag set), the current code waits for the possible GPU buffer being rendered through a call to vc4_queue_seqno_cb(). On the BCM2835-37, the GPU driver is part of the vc4 driver and that function is defined in vc4_gem.c to wait for the buffer to be rendered, and once it's done, call a callback. However, on the BCM2711 used on the RaspberryPi4, the GPU driver is separate (v3d) and that function won't do anything. This was working because we were going into a path, due to uninitialized variables, that was always scheduling the callback. However, we were never actually waiting for the buffer to be rendered which was resulting in frames being displayed out of order. The generic API to signal those kind of completion in the kernel are the DMA fences, and fortunately the v3d drivers supports them and signal when its job is done. That API also provides an equivalent function that allows to have a callback being executed when the fence is signalled as done. Let's change our driver a bit to rely on the previous function for the older SoCs, and on DMA fences for the BCM2711. Signed-off-by: Maxime Ripard Reviewed-by: Melissa Wen Link: https://lore.kernel.org/r/20220610115149.964394-14-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 50 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index a3c04d6cbd20..9355213dc883 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -776,6 +776,7 @@ struct vc4_async_flip_state { struct drm_pending_vblank_event *event; union { + struct dma_fence_cb fence; struct vc4_seqno_cb seqno; } cb; }; @@ -835,6 +836,50 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) vc4_bo_dec_usecnt(bo); } +static void vc4_async_page_flip_fence_complete(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb.fence); + + vc4_async_page_flip_complete(flip_state); + dma_fence_put(fence); +} + +static int vc4_async_set_fence_cb(struct drm_device *dev, + struct vc4_async_flip_state *flip_state) +{ + struct drm_framebuffer *fb = flip_state->fb; + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct dma_fence *fence; + int ret; + + if (!vc4->is_vc5) { + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + + return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, + vc4_async_page_flip_seqno_complete); + } + + ret = dma_resv_get_singleton(cma_bo->base.resv, DMA_RESV_USAGE_READ, &fence); + if (ret) + return ret; + + /* If there's no fence, complete the page flip immediately */ + if (!fence) { + vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); + return 0; + } + + /* If the fence has already been completed, complete the page flip */ + if (dma_fence_add_callback(fence, &flip_state->cb.fence, + vc4_async_page_flip_fence_complete)) + vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); + + return 0; +} + static int vc4_async_page_flip_common(struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -844,8 +889,6 @@ vc4_async_page_flip_common(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct drm_plane *plane = crtc->primary; struct vc4_async_flip_state *flip_state; - struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); - struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); if (!flip_state) @@ -876,8 +919,7 @@ vc4_async_page_flip_common(struct drm_crtc *crtc, */ drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, - vc4_async_page_flip_seqno_complete); + vc4_async_set_fence_cb(dev, flip_state); /* Driver takes ownership of state on successful async commit. */ return 0; -- cgit v1.2.3 From 30f8c74ca9b7b3a2db55f6bb1d2e9f8c47a79f94 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 10 Jun 2022 13:51:49 +0200 Subject: drm/vc4: Warn if some v3d code is run on BCM2711 The BCM2711 has a separate driver for the v3d, and thus we can't call into any of the driver entrypoints that rely on the v3d being there. Let's add a bunch of checks and complain loudly if that ever happen. Reviewed-by: Melissa Wen Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220610115149.964394-15-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_bo.c | 49 ++++++++++++++++++++++++++++++ drivers/gpu/drm/vc4/vc4_drv.c | 11 +++++++ drivers/gpu/drm/vc4/vc4_drv.h | 6 ++++ drivers/gpu/drm/vc4/vc4_gem.c | 40 ++++++++++++++++++++++++ drivers/gpu/drm/vc4/vc4_irq.c | 16 ++++++++++ drivers/gpu/drm/vc4/vc4_kms.c | 4 +++ drivers/gpu/drm/vc4/vc4_perfmon.c | 47 +++++++++++++++++++++++++++- drivers/gpu/drm/vc4/vc4_render_cl.c | 4 +++ drivers/gpu/drm/vc4/vc4_v3d.c | 15 +++++++++ drivers/gpu/drm/vc4/vc4_validate.c | 16 ++++++++++ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 4 +++ 11 files changed, 211 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3ca16d682fc0..b8d856312846 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -248,6 +248,9 @@ void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo) { struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->purgeable.lock); list_add_tail(&bo->size_head, &vc4->purgeable.list); vc4->purgeable.num++; @@ -259,6 +262,9 @@ static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo) { struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* list_del_init() is used here because the caller might release * the purgeable lock in order to acquire the madv one and update the * madv status. @@ -387,6 +393,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) return ERR_PTR(-ENOMEM); @@ -413,6 +422,9 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, struct drm_gem_cma_object *cma_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + if (size == 0) return ERR_PTR(-EINVAL); @@ -475,9 +487,13 @@ int vc4_bo_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo = NULL; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + ret = vc4_dumb_fixup_args(args); if (ret) return ret; @@ -598,8 +614,12 @@ static void vc4_bo_cache_time_work(struct work_struct *work) int vc4_bo_inc_usecnt(struct vc4_bo *bo) { + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + /* Fast path: if the BO is already retained by someone, no need to * check the madv status. */ @@ -634,6 +654,11 @@ int vc4_bo_inc_usecnt(struct vc4_bo *bo) void vc4_bo_dec_usecnt(struct vc4_bo *bo) { + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* Fast path: if the BO is still retained by someone, no need to test * the madv value. */ @@ -753,6 +778,9 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct vc4_bo *bo = NULL; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + ret = vc4_grab_bin_bo(vc4, vc4file); if (ret) return ret; @@ -776,9 +804,13 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_mmap_bo *args = data; struct drm_gem_object *gem_obj; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + gem_obj = drm_gem_object_lookup(file_priv, args->handle); if (!gem_obj) { DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); @@ -802,6 +834,9 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct vc4_bo *bo = NULL; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->size == 0) return -EINVAL; @@ -872,11 +907,15 @@ fail: int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_set_tiling *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; bool t_format; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->flags != 0) return -EINVAL; @@ -915,10 +954,14 @@ int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_get_tiling *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->flags != 0 || args->modifier != 0) return -EINVAL; @@ -945,6 +988,9 @@ int vc4_bo_cache_init(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); int i; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + /* Create the initial set of BO labels that the kernel will * use. This lets us avoid a bunch of string reallocation in * the kernel's draw and BO allocation paths. @@ -1004,6 +1050,9 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *gem_obj; int ret = 0, label; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!args->len) return -EINVAL; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ef4ab0563168..0f0f0263e744 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -99,6 +99,9 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, if (args->pad != 0) return -EINVAL; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) return -ENODEV; @@ -142,11 +145,16 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, static int vc4_open(struct drm_device *dev, struct drm_file *file) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL); if (!vc4file) return -ENOMEM; + vc4file->dev = vc4; vc4_perfmon_open_file(vc4file); file->driver_priv = vc4file; @@ -158,6 +166,9 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file->driver_priv; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (vc4file->bin_bo_used) vc4_v3d_bin_bo_put(vc4); diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 9c324c12c410..93fd55b9e99e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -48,6 +48,8 @@ enum vc4_kernel_bo_type { * done. This way, only events related to a specific job will be counted. */ struct vc4_perfmon { + struct vc4_dev *dev; + /* Tracks the number of users of the perfmon, when this counter reaches * zero the perfmon is destroyed. */ @@ -580,6 +582,8 @@ to_vc4_crtc_state(struct drm_crtc_state *crtc_state) #define VC4_REG32(reg) { .name = #reg, .offset = reg } struct vc4_exec_info { + struct vc4_dev *dev; + /* Sequence number for this bin/render job. */ uint64_t seqno; @@ -701,6 +705,8 @@ struct vc4_exec_info { * released when the DRM file is closed should be placed here. */ struct vc4_file { + struct vc4_dev *dev; + struct { struct idr idr; struct mutex lock; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 9eaf304fc20d..fe10d9c3fff8 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -76,6 +76,9 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, u32 i; int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n"); return -ENODEV; @@ -386,6 +389,9 @@ vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, unsigned long timeout_expire; DEFINE_WAIT(wait); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (vc4->finished_seqno >= seqno) return 0; @@ -468,6 +474,9 @@ vc4_submit_next_bin_job(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + again: exec = vc4_first_bin_job(vc4); if (!exec) @@ -513,6 +522,9 @@ vc4_submit_next_render_job(struct drm_device *dev) if (!exec) return; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* A previous RCL may have written to one of our textures, and * our full cache flush at bin time may have occurred before * that RCL completed. Flush the texture cache now, but not @@ -531,6 +543,9 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) struct vc4_dev *vc4 = to_vc4_dev(dev); bool was_empty = list_empty(&vc4->render_job_list); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + list_move_tail(&exec->head, &vc4->render_job_list); if (was_empty) vc4_submit_next_render_job(dev); @@ -997,6 +1012,9 @@ vc4_job_handle_completed(struct vc4_dev *vc4) unsigned long irqflags; struct vc4_seqno_cb *cb, *cb_temp; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + spin_lock_irqsave(&vc4->job_lock, irqflags); while (!list_empty(&vc4->job_done_list)) { struct vc4_exec_info *exec = @@ -1033,6 +1051,9 @@ int vc4_queue_seqno_cb(struct drm_device *dev, struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + cb->func = func; INIT_WORK(&cb->work, vc4_seqno_cb_work); @@ -1083,8 +1104,12 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_wait_seqno *args = data; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, &args->timeout_ns); } @@ -1093,11 +1118,15 @@ int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; struct drm_vc4_wait_bo *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->pad != 0) return -EINVAL; @@ -1144,6 +1173,9 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, args->shader_rec_size, args->bo_handle_count); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n"); return -ENODEV; @@ -1167,6 +1199,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, DRM_ERROR("malloc failure on exec struct\n"); return -ENOMEM; } + exec->dev = vc4; ret = vc4_v3d_pm_get(vc4); if (ret) { @@ -1276,6 +1309,9 @@ int vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + vc4->dma_fence_context = dma_fence_context_alloc(1); INIT_LIST_HEAD(&vc4->bin_job_list); @@ -1321,11 +1357,15 @@ static void vc4_gem_destroy(struct drm_device *dev, void *unused) int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_gem_madvise *args = data; struct drm_gem_object *gem_obj; struct vc4_bo *bo; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + switch (args->madv) { case VC4_MADV_DONTNEED: case VC4_MADV_WILLNEED: diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 4342fb43e8c1..2eacfb6773d2 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -265,6 +265,9 @@ vc4_irq_enable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (!vc4->v3d) return; @@ -279,6 +282,9 @@ vc4_irq_disable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (!vc4->v3d) return; @@ -296,8 +302,12 @@ vc4_irq_disable(struct drm_device *dev) int vc4_irq_install(struct drm_device *dev, int irq) { + struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (irq == IRQ_NOTCONNECTED) return -ENOTCONN; @@ -316,6 +326,9 @@ void vc4_irq_uninstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + vc4_irq_disable(dev); free_irq(vc4->irq, dev); } @@ -326,6 +339,9 @@ void vc4_irq_reset(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + /* Acknowledge any stale IRQs. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 1d3b31fb71ea..893d831b24aa 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -479,8 +479,12 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_mode_fb_cmd2 mode_cmd_local; + if (WARN_ON_ONCE(vc4->is_vc5)) + return ERR_PTR(-ENODEV); + /* If the user didn't specify a modifier, use the * vc4_set_tiling_ioctl() state for the BO. */ diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c index 18abc06335c1..c7f5adb6bcf8 100644 --- a/drivers/gpu/drm/vc4/vc4_perfmon.c +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -17,13 +17,27 @@ void vc4_perfmon_get(struct vc4_perfmon *perfmon) { + struct vc4_dev *vc4 = perfmon->dev; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (perfmon) refcount_inc(&perfmon->refcnt); } void vc4_perfmon_put(struct vc4_perfmon *perfmon) { - if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) + struct vc4_dev *vc4; + + if (!perfmon) + return; + + vc4 = perfmon->dev; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + + if (refcount_dec_and_test(&perfmon->refcnt)) kfree(perfmon); } @@ -32,6 +46,9 @@ void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon) unsigned int i; u32 mask; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon)) return; @@ -49,6 +66,9 @@ void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, { unsigned int i; + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + if (WARN_ON_ONCE(!vc4->active_perfmon || perfmon != vc4->active_perfmon)) return; @@ -64,8 +84,12 @@ void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) { + struct vc4_dev *vc4 = vc4file->dev; struct vc4_perfmon *perfmon; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + mutex_lock(&vc4file->perfmon.lock); perfmon = idr_find(&vc4file->perfmon.idr, id); vc4_perfmon_get(perfmon); @@ -76,8 +100,14 @@ struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) void vc4_perfmon_open_file(struct vc4_file *vc4file) { + struct vc4_dev *vc4 = vc4file->dev; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_init(&vc4file->perfmon.lock); idr_init_base(&vc4file->perfmon.idr, VC4_PERFMONID_MIN); + vc4file->dev = vc4; } static int vc4_perfmon_idr_del(int id, void *elem, void *data) @@ -91,6 +121,11 @@ static int vc4_perfmon_idr_del(int id, void *elem, void *data) void vc4_perfmon_close_file(struct vc4_file *vc4file) { + struct vc4_dev *vc4 = vc4file->dev; + + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4file->perfmon.lock); idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); idr_destroy(&vc4file->perfmon.idr); @@ -107,6 +142,9 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, unsigned int i; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Creating perfmon no VC4 V3D probed\n"); return -ENODEV; @@ -127,6 +165,7 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, GFP_KERNEL); if (!perfmon) return -ENOMEM; + perfmon->dev = vc4; for (i = 0; i < req->ncounters; i++) perfmon->events[i] = req->events[i]; @@ -157,6 +196,9 @@ int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, struct drm_vc4_perfmon_destroy *req = data; struct vc4_perfmon *perfmon; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n"); return -ENODEV; @@ -182,6 +224,9 @@ int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct vc4_perfmon *perfmon; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (!vc4->v3d) { DRM_DEBUG("Getting perfmon no VC4 V3D probed\n"); return -ENODEV; diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 3c918eeaf56e..f6b7dc3df08c 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -593,11 +593,15 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_rcl_setup setup = {0}; struct drm_vc4_submit_cl *args = exec->args; bool has_bin = args->bin_cl_size != 0; int ret; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + if (args->min_x_tile > args->max_x_tile || args->min_y_tile > args->max_y_tile) { DRM_DEBUG("Bad render tile set (%d,%d)-(%d,%d)\n", diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7bb3067f8425..cc714dcfe1f2 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -127,6 +127,9 @@ static int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) int vc4_v3d_pm_get(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + mutex_lock(&vc4->power_lock); if (vc4->power_refcount++ == 0) { int ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); @@ -145,6 +148,9 @@ vc4_v3d_pm_get(struct vc4_dev *vc4) void vc4_v3d_pm_put(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -172,6 +178,9 @@ int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) uint64_t seqno = 0; struct vc4_exec_info *exec; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + try_again: spin_lock_irqsave(&vc4->job_lock, irqflags); slot = ffs(~vc4->bin_alloc_used); @@ -316,6 +325,9 @@ int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used) { int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + mutex_lock(&vc4->bin_bo_lock); if (used && *used) @@ -348,6 +360,9 @@ static void bin_bo_release(struct kref *ref) void vc4_v3d_bin_bo_put(struct vc4_dev *vc4) { + if (WARN_ON_ONCE(vc4->is_vc5)) + return; + mutex_lock(&vc4->bin_bo_lock); kref_put(&vc4->bin_bo_kref, bin_bo_release); mutex_unlock(&vc4->bin_bo_lock); diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index eec76af49f04..833eb623d545 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -105,9 +105,13 @@ size_is_lt(uint32_t width, uint32_t height, int cpp) struct drm_gem_cma_object * vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) { + struct vc4_dev *vc4 = exec->dev; struct drm_gem_cma_object *obj; struct vc4_bo *bo; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + if (hindex >= exec->bo_count) { DRM_DEBUG("BO index %d greater than BO count %d\n", hindex, exec->bo_count); @@ -160,10 +164,14 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, uint32_t offset, uint8_t tiling_format, uint32_t width, uint32_t height, uint8_t cpp) { + struct vc4_dev *vc4 = exec->dev; uint32_t aligned_width, aligned_height, stride, size; uint32_t utile_w = utile_width(cpp); uint32_t utile_h = utile_height(cpp); + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + /* The shaded vertex format stores signed 12.4 fixed point * (-2048,2047) offsets from the viewport center, so we should * never have a render target larger than 4096. The texture @@ -482,10 +490,14 @@ vc4_validate_bin_cl(struct drm_device *dev, void *unvalidated, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t len = exec->args->bin_cl_size; uint32_t dst_offset = 0; uint32_t src_offset = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + while (src_offset < len) { void *dst_pkt = validated + dst_offset; void *src_pkt = unvalidated + src_offset; @@ -926,9 +938,13 @@ int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec) { + struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t i; int ret = 0; + if (WARN_ON_ONCE(vc4->is_vc5)) + return -ENODEV; + for (i = 0; i < exec->shader_state_count; i++) { ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]); if (ret) diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 7cf82b071de2..e315aeb5fef5 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -778,6 +778,7 @@ vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { + struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev); bool found_shader_end = false; int shader_end_ip = 0; uint32_t last_thread_switch_ip = -3; @@ -785,6 +786,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; + if (WARN_ON_ONCE(vc4->is_vc5)) + return NULL; + memset(&validation_state, 0, sizeof(validation_state)); validation_state.shader = shader_obj->vaddr; validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); -- cgit v1.2.3 From 06781a5026350cde699d2d10c9914a25c1524f45 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 14 Jun 2022 10:31:38 +0200 Subject: mtd: rawnand: gpmi: Fix setting busy timeout setting The DEVICE_BUSY_TIMEOUT value is described in the Reference Manual as: | Timeout waiting for NAND Ready/Busy or ATA IRQ. Used in WAIT_FOR_READY | mode. This value is the number of GPMI_CLK cycles multiplied by 4096. So instead of multiplying the value in cycles with 4096, we have to divide it by that value. Use DIV_ROUND_UP to make sure we are on the safe side, especially when the calculated value in cycles is smaller than 4096 as typically the case. This bug likely never triggered because any timeout != 0 usually will do. In my case the busy timeout in cycles was originally calculated as 2408, which multiplied with 4096 is 0x968000. The lower 16 bits were taken for the 16 bit wide register field, so the register value was 0x8000. With 2970bf5a32f0 ("mtd: rawnand: gpmi: fix controller timings setting") however the value in cycles became 2384, which multiplied with 4096 is 0x950000. The lower 16 bit are 0x0 now resulting in an intermediate timeout when reading from NAND. Fixes: b1206122069aa ("mtd: rawnand: gpmi: use core timings instead of an empirical derivation") Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220614083138.3455683-1-s.hauer@pengutronix.de --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 0b68d05846e1..889e40329956 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -890,7 +890,7 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | BF_GPMI_TIMING0_DATA_SETUP(data_setup_cycles); - hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(busy_timeout_cycles * 4096); + hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(DIV_ROUND_UP(busy_timeout_cycles, 4096)); /* * Derive NFC ideal delay from {3}: -- cgit v1.2.3 From 042999388ef3dba43e813fdc6d6133ec9ca405dc Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 2 Jun 2022 14:21:16 +0800 Subject: mm/page_isolation.c: fix one kernel-doc comment Remove one warning found by running scripts/kernel-doc, which is caused by using 'make W=1': mm/page_isolation.c:304: warning: Function parameter or member 'skip_isolation' not described in 'isolate_single_pageblock' Link: https://lkml.kernel.org/r/20220602062116.61199-1-yang.lee@linux.alibaba.com Signed-off-by: Yang Li Reported-by: Abaci Robot Signed-off-by: Andrew Morton --- mm/page_isolation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index d200d41ad0d3..9d73dc38e3d7 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -286,6 +286,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * @flags: isolation flags * @gfp_flags: GFP flags used for migrating pages * @isolate_before: isolate the pageblock before the boundary_pfn + * @skip_isolation: the flag to skip the pageblock isolation in second + * isolate_single_pageblock() * * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one * pageblock. When not all pageblocks within a page are isolated at the same -- cgit v1.2.3 From 31733463372e8d88ea54bfa1e35178aad9b2ffd2 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 30 May 2022 12:51:56 -0300 Subject: mm: lru_cache_disable: use synchronize_rcu_expedited commit ff042f4a9b050 ("mm: lru_cache_disable: replace work queue synchronization with synchronize_rcu") replaced lru_cache_disable's usage of work queues with synchronize_rcu. Some users reported large performance regressions due to this commit, for example: https://lore.kernel.org/all/20220521234616.GO1790663@paulmck-ThinkPad-P17-Gen-1/T/ Switching to synchronize_rcu_expedited fixes the problem. Link: https://lkml.kernel.org/r/YpToHCmnx/HEcVyR@fuller.cnet Fixes: ff042f4a9b050 ("mm: lru_cache_disable: replace work queue synchronization with synchronize_rcu") Signed-off-by: Marcelo Tosatti Tested-by: Stefan Wahren Tested-by: Michael Larabel Cc: Sebastian Andrzej Siewior Cc: Nicolas Saenz Julienne Cc: Borislav Petkov Cc: Minchan Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Juri Lelli Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Phil Elwell Cc: Signed-off-by: Andrew Morton --- mm/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index f3922a96b2e9..034bb24879a3 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -881,7 +881,7 @@ void lru_cache_disable(void) * lru_disable_count = 0 will have exited the critical * section when synchronize_rcu() returns. */ - synchronize_rcu(); + synchronize_rcu_expedited(); #ifdef CONFIG_SMP __lru_add_drain_all(true); #else -- cgit v1.2.3 From d25c83c6606ffc3abdf0868136ad3399f648ad70 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 15 Mar 2022 11:24:44 +0100 Subject: kthread: make it clear that kthread_create_on_node() might be terminated by any fatal signal The comments in kernel/kthread.c create a feeling that only SIGKILL is able to terminate the creation of kernel kthreads by kthread_create()/_on_node()/_on_cpu() APIs. In reality, wait_for_completion_killable() might be killed by any fatal signal that does not have a custom handler: (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) static inline void signal_wake_up(struct task_struct *t, bool resume) { signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); } static void complete_signal(int sig, struct task_struct *p, enum pid_type type) { [...] /* * Found a killable thread. If the signal will be fatal, * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) ...) { if (!sig_kernel_coredump(sig)) { [...] do { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); return; } } } Update the comments in kernel/kthread.c to make this more obvious. The motivation for this change was debugging why a module initialization failed. The module was being loaded from initrd. It "magically" failed when systemd was switching to the real root. The clean up operations sent SIGTERM to various pending processed that were started from initrd. Link: https://lkml.kernel.org/r/20220315102444.2380-1-pmladek@suse.com Signed-off-by: Petr Mladek Reviewed-by: "Eric W. Biederman" Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Kees Cook Cc: Marco Elver Cc: Jens Axboe Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/kthread.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 544fd4097406..3c677918d8f2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -340,7 +340,7 @@ static int kthread(void *_create) self = to_kthread(current); - /* If user was SIGKILLed, I release the structure. */ + /* Release the structure when caller killed by a fatal signal. */ done = xchg(&create->done, NULL); if (!done) { kfree(create); @@ -398,7 +398,7 @@ static void create_kthread(struct kthread_create_info *create) /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { - /* If user was SIGKILLed, I release the structure. */ + /* Release the structure when caller killed by a fatal signal. */ struct completion *done = xchg(&create->done, NULL); if (!done) { @@ -440,9 +440,9 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), */ if (unlikely(wait_for_completion_killable(&done))) { /* - * If I was SIGKILLed before kthreadd (or new kernel thread) - * calls complete(), leave the cleanup of this structure to - * that thread. + * If I was killed by a fatal signal before kthreadd (or new + * kernel thread) calls complete(), leave the cleanup of this + * structure to that thread. */ if (xchg(&create->done, NULL)) return ERR_PTR(-EINTR); @@ -876,7 +876,7 @@ fail_task: * * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal. */ struct kthread_worker * kthread_create_worker(unsigned int flags, const char namefmt[], ...) @@ -925,7 +925,7 @@ EXPORT_SYMBOL(kthread_create_worker); * Return: * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal. */ struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, -- cgit v1.2.3 From 2949282938135ab734c3829495ae393523ceb702 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 4 Jun 2022 19:50:51 +0000 Subject: mm/damon/reclaim: schedule 'damon_reclaim_timer' only after 'system_wq' is initialized Commit 059342d1dd4e ("mm/damon/reclaim: fix the timer always stays active") made DAMON_RECLAIM's 'enabled' parameter store callback, 'enabled_store()', to schedule 'damon_reclaim_timer'. The scheduling uses 'system_wq', which is initialized in 'workqueue_init_early()'. As kernel parameters parsing function ('parse_args()') is called before 'workqueue_init_early()', 'enabled_store()' can be executed before 'workqueue_init_early()' and end up accessing the uninitialized 'system_wq'. As a result, the booting hang[1]. This commit fixes the issue by checking if the initialization is done before scheduling the timer. [1] https://lkml.kernel.org/20220604192222.1488-1-sj@kernel.org/ Link: https://lkml.kernel.org/r/20220604195051.1589-1-sj@kernel.org Fixes: 059342d1dd4e ("mm/damon/reclaim: fix the timer always stays active") Signed-off-by: SeongJae Park Reported-by: Greg White Cc: Hailong Tu Signed-off-by: Andrew Morton --- mm/damon/reclaim.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 8efbfb24f3a1..4b07c29effe9 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -374,6 +374,8 @@ static void damon_reclaim_timer_fn(struct work_struct *work) } static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); +static bool damon_reclaim_initialized; + static int enabled_store(const char *val, const struct kernel_param *kp) { @@ -382,6 +384,10 @@ static int enabled_store(const char *val, if (rc < 0) return rc; + /* system_wq might not initialized yet */ + if (!damon_reclaim_initialized) + return rc; + if (enabled) schedule_delayed_work(&damon_reclaim_timer, 0); @@ -449,6 +455,8 @@ static int __init damon_reclaim_init(void) damon_add_target(ctx, target); schedule_delayed_work(&damon_reclaim_timer, 0); + + damon_reclaim_initialized = true; return 0; } -- cgit v1.2.3 From 515e1d86c982b169e77cfe245994d2a60fc0d012 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 7 Jun 2022 19:41:39 +0300 Subject: mailmap: add alias for jarkko@profian.com Add alias for patches that I contribute on behalf of Profian (my current employer). Link: https://lkml.kernel.org/r/20220607164140.1230876-1-jarkko@kernel.org Signed-off-by: Jarkko Sakkinen Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 825fae8e6b7b..b2967aab5359 100644 --- a/.mailmap +++ b/.mailmap @@ -165,6 +165,7 @@ Jan Glauber Jan Glauber Jan Glauber Jarkko Sakkinen +Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe -- cgit v1.2.3 From 6901c0b6df157a88721e5b71f85af4c684877949 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 7 Jun 2022 22:51:35 +0800 Subject: MAINTAINERS: add Miaohe Lin as a memory-failure reviewer I have been focusing on mm for the past two years. e.g. fixing bugs, cleaning up the code and reviewing. I would like to help maintainers and people working on memory-failure by reviewing their work. Let me be Cc'd on patches related to memory-failure. Link: https://lkml.kernel.org/r/20220607145135.38670-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1fc9ead83d2a..96db6b61951a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9132,6 +9132,7 @@ F: drivers/media/platform/st/sti/hva HWPOISON MEMORY FAILURE HANDLING M: Naoya Horiguchi +R: Miaohe Lin L: linux-mm@kvack.org S: Maintained F: mm/hwpoison-inject.c -- cgit v1.2.3 From 7757e7627a05c01d137a7fb87ac9d1533f460d33 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 10 Jun 2022 12:12:58 +0200 Subject: MAINTAINERS: add MEMORY HOT(UN)PLUG section and add David as reviewer There are certainly a lot more files that partially fall into the memory hot(un)plug category, including parts of mm/sparse.c, mm/page_isolation.c and mm/page_alloc.c. Let's only add what's almost completely memory hot(un)plug related. Add myself as reviewer so it's easier for contributors to figure out whom to CC. Link: https://lkml.kernel.org/r/20220610101258.75738-1-david@redhat.com Link: https://lkml.kernel.org/r/YqlaE/LYHwB0gpaW@localhost.localdomain Signed-off-by: David Hildenbrand Acked-by: Muchun Song Acked-by: Greg Kroah-Hartman Cc: Miaohe Lin Signed-off-by: Andrew Morton --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96db6b61951a..59fbe15d469b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12858,6 +12858,18 @@ F: include/linux/vmalloc.h F: mm/ F: tools/testing/selftests/vm/ +MEMORY HOT(UN)PLUG +M: David Hildenbrand +M: Oscar Salvador +L: linux-mm@kvack.org +S: Maintained +F: Documentation/admin-guide/mm/memory-hotplug.rst +F: Documentation/core-api/memory-hotplug.rst +F: drivers/base/memory.c +F: include/linux/memory_hotplug.h +F: mm/memory_hotplug.c +F: tools/testing/selftests/memory-hotplug/ + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger -- cgit v1.2.3 From 8585c3971df4bc3b909b5e7e6c7656f379d2642d Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Sat, 11 Jun 2022 12:31:42 +0300 Subject: MAINTAINERS: update Abel Vesa's email Use Abel Vesa's kernel.org account in maintainer entry and mailmap. Link: https://lkml.kernel.org/r/20220611093142.202271-1-abelvesa@kernel.org Signed-off-by: Abel Vesa Cc: Stephen Boyd Cc: Dong Aisheng Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index b2967aab5359..dda0030573ca 100644 --- a/.mailmap +++ b/.mailmap @@ -10,6 +10,8 @@ # Please keep this list dictionary sorted. # Aaron Durbin +Abel Vesa +Abel Vesa Abhinav Kumar Adam Oldham Adam Radford diff --git a/MAINTAINERS b/MAINTAINERS index 59fbe15d469b..3dfb95897e16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14274,7 +14274,7 @@ F: drivers/iio/gyro/fxas21002c_i2c.c F: drivers/iio/gyro/fxas21002c_spi.c NXP i.MX CLOCK DRIVERS -M: Abel Vesa +M: Abel Vesa L: linux-clk@vger.kernel.org L: linux-imx@nxp.com S: Maintained -- cgit v1.2.3 From f0a7d33a7184df3193e4bd9ef9283a0a92bed4a6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 15 Jun 2022 14:22:44 -0700 Subject: MAINTAINERS: update MM tree references Describe the new kernel.org location of the MM trees. Suggested-by: David Hildenbrand Cc: Muchun Song Cc: Greg Kroah-Hartman Cc: Miaohe Lin Signed-off-by: Andrew Morton --- MAINTAINERS | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3dfb95897e16..f3be1b26eecf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12846,9 +12846,8 @@ M: Andrew Morton L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org -T: quilt https://ozlabs.org/~akpm/mmotm/ -T: quilt https://ozlabs.org/~akpm/mmots/ -T: git git://github.com/hnaz/linux-mm.git +T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new F: include/linux/gfp.h F: include/linux/memory_hotplug.h F: include/linux/mm.h -- cgit v1.2.3 From 8a6f62a26d1e4e6835fbd4591c2bedcfcceadb1d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Jun 2022 20:14:56 +0800 Subject: MAINTAINERS: add maillist information for LoongArch Now there is a dedicated maillist (loongarch@lists.linux.dev) for LoongArch, add it for better collaboration. Link: https://lkml.kernel.org/r/20220616121456.3613470-1-chenhuacai@loongson.cn Signed-off-by: Huacai Chen Reviewed-by: WANG Xuerui Cc: Huacai Chen Cc: Arnd Bergmann Cc: Xuefeng Li Cc: Guo Ren Cc: Xuerui Wang Cc: Jiaxun Yang Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f3be1b26eecf..95b44367f0ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11591,6 +11591,7 @@ F: drivers/gpu/drm/bridge/lontium-lt8912b.c LOONGARCH M: Huacai Chen R: WANG Xuerui +L: loongarch@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git F: arch/loongarch/ -- cgit v1.2.3 From 327b18b7aaed5de3b548212e3ab75133bf323759 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 9 Jun 2022 14:33:19 +0200 Subject: mm/kfence: select random number before taking raw lock The RNG uses vanilla spinlocks, not raw spinlocks, so kfence should pick its random numbers before taking its raw spinlocks. This also has the nice effect of doing less work inside the lock. It should fix a splat that Geert saw with CONFIG_PROVE_RAW_LOCK_NESTING: dump_backtrace.part.0+0x98/0xc0 show_stack+0x14/0x28 dump_stack_lvl+0xac/0xec dump_stack+0x14/0x2c __lock_acquire+0x388/0x10a0 lock_acquire+0x190/0x2c0 _raw_spin_lock_irqsave+0x6c/0x94 crng_make_state+0x148/0x1e4 _get_random_bytes.part.0+0x4c/0xe8 get_random_u32+0x4c/0x140 __kfence_alloc+0x460/0x5c4 kmem_cache_alloc_trace+0x194/0x1dc __kthread_create_on_node+0x5c/0x1a8 kthread_create_on_node+0x58/0x7c printk_start_kthread.part.0+0x34/0xa8 printk_activate_kthreads+0x4c/0x54 do_one_initcall+0xec/0x278 kernel_init_freeable+0x11c/0x214 kernel_init+0x24/0x124 ret_from_fork+0x10/0x20 Link: https://lkml.kernel.org/r/20220609123319.17576-1-Jason@zx2c4.com Fixes: d4150779e60f ("random32: use real rng for non-deterministic randomness") Signed-off-by: Jason A. Donenfeld Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Reviewed-by: Marco Elver Reviewed-by: Petr Mladek Cc: John Ogness Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kfence/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 4e7cd4c8e687..4b5e5a3d3a63 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -360,6 +360,9 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g unsigned long flags; struct slab *slab; void *addr; + const bool random_right_allocate = prandom_u32_max(2); + const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS && + !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS); /* Try to obtain a free object. */ raw_spin_lock_irqsave(&kfence_freelist_lock, flags); @@ -404,7 +407,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g * is that the out-of-bounds accesses detected are deterministic for * such allocations. */ - if (prandom_u32_max(2)) { + if (random_right_allocate) { /* Allocate on the "right" side, re-calculate address. */ meta->addr += PAGE_SIZE - size; meta->addr = ALIGN_DOWN(meta->addr, cache->align); @@ -444,7 +447,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g if (cache->ctor) cache->ctor(addr); - if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS)) + if (random_fault) kfence_protect(meta->addr); /* Random "faults" by protecting the object. */ atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]); -- cgit v1.2.3 From 034e5afad921f1c08c001bf147fb1ba76ae33498 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 10 Jun 2022 16:35:13 -0600 Subject: mm: re-allow pinning of zero pfns The commit referenced below subtly and inadvertently changed the logic to disallow pinning of zero pfns. This breaks device assignment with vfio and potentially various other users of gup. Exclude the zero page test from the negation. Link: https://lkml.kernel.org/r/165490039431.944052.12458624139225785964.stgit@omen Fixes: 1c563432588d ("mm: fix is_pinnable_page against a cma page") Signed-off-by: Alex Williamson Acked-by: Minchan Kim Acked-by: David Hildenbrand Reported-by: Yishai Hadas Cc: Paul E. McKenney Cc: John Hubbard Cc: John Dias Cc: Jason Gunthorpe Cc: Zhangfei Gao Cc: Matthew Wilcox Cc: Joao Martins Cc: Yi Liu Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index bc8f326be0ce..781fae17177d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1600,7 +1600,7 @@ static inline bool is_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page))); + return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); } #else static inline bool is_pinnable_page(struct page *page) -- cgit v1.2.3 From df4ae285a3d5ce99d69efe81b21c4fed9bbc51b9 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Fri, 10 Jun 2022 02:44:52 +0000 Subject: mm: memcontrol: reference to tools/cgroup/memcg_slabinfo.py There is no slabinfo.py in tools/cgroup, but has memcg_slabinfo.py instead. Link: https://lkml.kernel.org/r/20220610024451.744135-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reviewed-by: Muchun Song Acked-by: Roman Gushchin Signed-off-by: Andrew Morton --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index abec50f31fe6..618c366a2f07 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4859,7 +4859,7 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) { /* * Deprecated. - * Please, take a look at tools/cgroup/slabinfo.py . + * Please, take a look at tools/cgroup/memcg_slabinfo.py . */ return 0; } -- cgit v1.2.3 From 68d32527d340b0d13c8cf6495d6ab4332adca09a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 13 Jun 2022 13:36:48 -0700 Subject: hugetlbfs: zero partial pages during fallocate hole punch hugetlbfs fallocate support was originally added with commit 70c3547e36f5 ("hugetlbfs: add hugetlbfs_fallocate()"). Initial support only operated on whole hugetlb pages. This makes sense for populating files as other interfaces such as mmap and truncate require hugetlb page size alignment. Only operating on whole hugetlb pages for the hole punch case was a simplification and there was no compelling use case to zero partial pages. In a recent discussion[1] it was assumed that hugetlbfs hole punch would zero partial hugetlb pages as that is in line with the man page description saying 'partial filesystem blocks are zeroed'. However, the hugetlbfs hole punch code actually does this: hole_start = round_up(offset, hpage_size); hole_end = round_down(offset + len, hpage_size); Modify code to zero partial hugetlb pages in hole punch range. It is possible that application code could note a change in behavior. However, that would imply the code is passing in an unaligned range and expecting only whole pages be removed. This is unlikely as the fallocate documentation states the opposite. The current hugetlbfs fallocate hole punch behavior is tested with the libhugetlbfs test fallocate_align[2]. This test will be updated to validate partial page zeroing. [1] https://lore.kernel.org/linux-mm/20571829-9d3d-0b48-817c-b6b15565f651@redhat.com/ [2] https://github.com/libhugetlbfs/libhugetlbfs/blob/master/tests/fallocate_align.c Link: https://lkml.kernel.org/r/YqeiMlZDKI1Kabfe@monkey Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Naoya Horiguchi Cc: Axel Rasmussen Cc: Dave Hansen Cc: Michal Hocko Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 62408047e8d7..02eb72351b15 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -600,41 +600,79 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset) remove_inode_hugepages(inode, offset, LLONG_MAX); } +static void hugetlbfs_zero_partial_page(struct hstate *h, + struct address_space *mapping, + loff_t start, + loff_t end) +{ + pgoff_t idx = start >> huge_page_shift(h); + struct folio *folio; + + folio = filemap_lock_folio(mapping, idx); + if (!folio) + return; + + start = start & ~huge_page_mask(h); + end = end & ~huge_page_mask(h); + if (!end) + end = huge_page_size(h); + + folio_zero_segment(folio, (size_t)start, (size_t)end); + + folio_unlock(folio); + folio_put(folio); +} + static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) { + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); + struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_inode(inode); loff_t hpage_size = huge_page_size(h); loff_t hole_start, hole_end; /* - * For hole punch round up the beginning offset of the hole and - * round down the end. + * hole_start and hole_end indicate the full pages within the hole. */ hole_start = round_up(offset, hpage_size); hole_end = round_down(offset + len, hpage_size); - if (hole_end > hole_start) { - struct address_space *mapping = inode->i_mapping; - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); + inode_lock(inode); - inode_lock(inode); + /* protected by i_rwsem */ + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { + inode_unlock(inode); + return -EPERM; + } - /* protected by i_rwsem */ - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { - inode_unlock(inode); - return -EPERM; - } + i_mmap_lock_write(mapping); + + /* If range starts before first full page, zero partial page. */ + if (offset < hole_start) + hugetlbfs_zero_partial_page(h, mapping, + offset, min(offset + len, hole_start)); - i_mmap_lock_write(mapping); + /* Unmap users of full pages in the hole. */ + if (hole_end > hole_start) { if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT, 0); - i_mmap_unlock_write(mapping); - remove_inode_hugepages(inode, hole_start, hole_end); - inode_unlock(inode); } + /* If range extends beyond last full page, zero partial page. */ + if ((offset + len) > hole_end && (offset + len) > hole_start) + hugetlbfs_zero_partial_page(h, mapping, + hole_end, offset + len); + + i_mmap_unlock_write(mapping); + + /* Remove full pages from the file. */ + if (hole_end > hole_start) + remove_inode_hugepages(inode, hole_start, hole_end); + + inode_unlock(inode); + return 0; } -- cgit v1.2.3 From 67f22ba7750f940bcd7e1b12720896c505c2d63f Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 15 Jun 2022 17:32:09 +0800 Subject: mm/memory-failure: disable unpoison once hw error happens Currently unpoison_memory(unsigned long pfn) is designed for soft poison(hwpoison-inject) only. Since 17fae1294ad9d, the KPTE gets cleared on a x86 platform once hardware memory corrupts. Unpoisoning a hardware corrupted page puts page back buddy only, the kernel has a chance to access the page with *NOT PRESENT* KPTE. This leads BUG during accessing on the corrupted KPTE. Suggested by David&Naoya, disable unpoison mechanism when a real HW error happens to avoid BUG like this: Unpoison: Software-unpoisoned page 0x61234 BUG: unable to handle page fault for address: ffff888061234000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 2c01067 P4D 2c01067 PUD 107267063 PMD 10382b063 PTE 800fffff9edcb062 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 26551 Comm: stress Kdump: loaded Tainted: G M OE 5.18.0.bm.1-amd64 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ... RIP: 0010:clear_page_erms+0x7/0x10 Code: ... RSP: 0000:ffffc90001107bc8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000901 RCX: 0000000000001000 RDX: ffffea0001848d00 RSI: ffffea0001848d40 RDI: ffff888061234000 RBP: ffffea0001848d00 R08: 0000000000000901 R09: 0000000000001276 R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001 R13: 0000000000000000 R14: 0000000000140dca R15: 0000000000000001 FS: 00007fd8b2333740(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff888061234000 CR3: 00000001023d2005 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: prep_new_page+0x151/0x170 get_page_from_freelist+0xca0/0xe20 ? sysvec_apic_timer_interrupt+0xab/0xc0 ? asm_sysvec_apic_timer_interrupt+0x1b/0x20 __alloc_pages+0x17e/0x340 __folio_alloc+0x17/0x40 vma_alloc_folio+0x84/0x280 __handle_mm_fault+0x8d4/0xeb0 handle_mm_fault+0xd5/0x2a0 do_user_addr_fault+0x1d0/0x680 ? kvm_read_and_reset_apf_flags+0x3b/0x50 exc_page_fault+0x78/0x170 asm_exc_page_fault+0x27/0x30 Link: https://lkml.kernel.org/r/20220615093209.259374-2-pizhenwei@bytedance.com Fixes: 847ce401df392 ("HWPOISON: Add unpoisoning support") Fixes: 17fae1294ad9d ("x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned") Signed-off-by: zhenwei pi Acked-by: David Hildenbrand Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Reviewed-by: Oscar Salvador Cc: Greg Kroah-Hartman Cc: [5.8+] Signed-off-by: Andrew Morton --- Documentation/vm/hwpoison.rst | 3 ++- drivers/base/memory.c | 2 +- include/linux/mm.h | 1 + mm/hwpoison-inject.c | 2 +- mm/madvise.c | 2 +- mm/memory-failure.c | 12 ++++++++++++ 6 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst index c742de1769d1..b9d5253c1305 100644 --- a/Documentation/vm/hwpoison.rst +++ b/Documentation/vm/hwpoison.rst @@ -120,7 +120,8 @@ Testing unpoison-pfn Software-unpoison page at PFN echoed into this file. This way a page can be reused again. This only works for Linux - injected failures, not for real memory failures. + injected failures, not for real memory failures. Once any hardware + memory failure happens, this feature is disabled. Note these injection interfaces are not stable and might change between kernel versions diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 084d67fd55cc..bc60c9cd3230 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -558,7 +558,7 @@ static ssize_t hard_offline_page_store(struct device *dev, if (kstrtoull(buf, 0, &pfn) < 0) return -EINVAL; pfn >>= PAGE_SHIFT; - ret = memory_failure(pfn, 0); + ret = memory_failure(pfn, MF_SW_SIMULATED); if (ret == -EOPNOTSUPP) ret = 0; return ret ? ret : count; diff --git a/include/linux/mm.h b/include/linux/mm.h index 781fae17177d..cf3d0d673f6b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3232,6 +3232,7 @@ enum mf_flags { MF_MUST_KILL = 1 << 2, MF_SOFT_OFFLINE = 1 << 3, MF_UNPOISON = 1 << 4, + MF_SW_SIMULATED = 1 << 5, }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 5c0cddd81505..65e242b5a432 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -48,7 +48,7 @@ static int hwpoison_inject(void *data, u64 val) inject: pr_info("Injecting memory failure at pfn %#lx\n", pfn); - err = memory_failure(pfn, 0); + err = memory_failure(pfn, MF_SW_SIMULATED); return (err == -EOPNOTSUPP) ? 0 : err; } diff --git a/mm/madvise.c b/mm/madvise.c index d7b4f2602949..0316bbc6441b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1112,7 +1112,7 @@ static int madvise_inject_error(int behavior, } else { pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", pfn, start); - ret = memory_failure(pfn, MF_COUNT_INCREASED); + ret = memory_failure(pfn, MF_COUNT_INCREASED | MF_SW_SIMULATED); if (ret == -EOPNOTSUPP) ret = 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b85661cbdc4a..da39ec8afca8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -69,6 +69,8 @@ int sysctl_memory_failure_recovery __read_mostly = 1; atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); +static bool hw_memory_failure __read_mostly = false; + static bool __page_handle_poison(struct page *page) { int ret; @@ -1768,6 +1770,9 @@ int memory_failure(unsigned long pfn, int flags) mutex_lock(&mf_mutex); + if (!(flags & MF_SW_SIMULATED)) + hw_memory_failure = true; + p = pfn_to_online_page(pfn); if (!p) { res = arch_memory_failure(pfn, flags); @@ -2103,6 +2108,13 @@ int unpoison_memory(unsigned long pfn) mutex_lock(&mf_mutex); + if (hw_memory_failure) { + unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n", + pfn, &unpoison_rs); + ret = -EOPNOTSUPP; + goto unlock_mutex; + } + if (!PageHWPoison(p)) { unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", pfn, &unpoison_rs); -- cgit v1.2.3 From e67679cc4264cf9b318af4e8616eaa2a7565db1f Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 16 Jun 2022 00:50:12 +0200 Subject: mailmap: add entry for Christian Marangi Add entry to map ansuelsmth@gmail.com to the unique identity of Christian Marangi. Link: https://lkml.kernel.org/r/20220615225012.18782-1-ansuelsmth@gmail.com Signed-off-by: Christian Marangi Cc: Jens Axboe Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index dda0030573ca..2ed1cf869175 100644 --- a/.mailmap +++ b/.mailmap @@ -87,6 +87,7 @@ Christian Borntraeger Christian Brauner Christian Brauner Christian Brauner +Christian Marangi Christophe Ricard Christoph Hellwig Colin Ian King -- cgit v1.2.3 From 540a92bfe6dab7310b9df2e488ba247d784d0163 Mon Sep 17 00:00:00 2001 From: Edward Wu Date: Fri, 17 Jun 2022 11:32:20 +0800 Subject: ata: libata: add qc->flags in ata_qc_complete_template tracepoint Add flags value to check the result of ata completion Fixes: 255c03d15a29 ("libata: Add tracepoints") Cc: stable@vger.kernel.org Signed-off-by: Edward Wu Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index d4e631aa976f..6025dd8ba4aa 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -288,6 +288,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; + __entry->flags = qc->flags; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ -- cgit v1.2.3 From 56961c6331463cce2d84d0f973177a517fb33a82 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 16 Jun 2022 16:11:34 +0000 Subject: KVM: arm64: Prevent kmemleak from accessing pKVM memory Commit a7259df76702 ("memblock: make memblock_find_in_range method private") changed the API using which memory is reserved for the pKVM hypervisor. However, memblock_phys_alloc() differs from the original API in terms of kmemleak semantics -- the old one didn't report the reserved regions to kmemleak while the new one does. Unfortunately, when protected KVM is enabled, all kernel accesses to pKVM-private memory result in a fatal exception, which can now happen because of kmemleak scans: $ echo scan > /sys/kernel/debug/kmemleak [ 34.991354] kvm [304]: nVHE hyp BUG at: [] __kvm_nvhe_handle_host_mem_abort+0x270/0x290! [ 34.991580] kvm [304]: Hyp Offset: 0xfffe8be807e00000 [ 34.991813] Kernel panic - not syncing: HYP panic: [ 34.991813] PS:600003c9 PC:0000f418011a3750 ESR:00000000f2000800 [ 34.991813] FAR:ffff000439200000 HPFAR:0000000004792000 PAR:0000000000000000 [ 34.991813] VCPU:0000000000000000 [ 34.993660] CPU: 0 PID: 304 Comm: bash Not tainted 5.19.0-rc2 #102 [ 34.994059] Hardware name: linux,dummy-virt (DT) [ 34.994452] Call trace: [ 34.994641] dump_backtrace.part.0+0xcc/0xe0 [ 34.994932] show_stack+0x18/0x6c [ 34.995094] dump_stack_lvl+0x68/0x84 [ 34.995276] dump_stack+0x18/0x34 [ 34.995484] panic+0x16c/0x354 [ 34.995673] __hyp_pgtable_total_pages+0x0/0x60 [ 34.995933] scan_block+0x74/0x12c [ 34.996129] scan_gray_list+0xd8/0x19c [ 34.996332] kmemleak_scan+0x2c8/0x580 [ 34.996535] kmemleak_write+0x340/0x4a0 [ 34.996744] full_proxy_write+0x60/0xbc [ 34.996967] vfs_write+0xc4/0x2b0 [ 34.997136] ksys_write+0x68/0xf4 [ 34.997311] __arm64_sys_write+0x20/0x2c [ 34.997532] invoke_syscall+0x48/0x114 [ 34.997779] el0_svc_common.constprop.0+0x44/0xec [ 34.998029] do_el0_svc+0x2c/0xc0 [ 34.998205] el0_svc+0x2c/0x84 [ 34.998421] el0t_64_sync_handler+0xf4/0x100 [ 34.998653] el0t_64_sync+0x18c/0x190 [ 34.999252] SMP: stopping secondary CPUs [ 35.000034] Kernel Offset: disabled [ 35.000261] CPU features: 0x800,00007831,00001086 [ 35.000642] Memory Limit: none [ 35.001329] ---[ end Kernel panic - not syncing: HYP panic: [ 35.001329] PS:600003c9 PC:0000f418011a3750 ESR:00000000f2000800 [ 35.001329] FAR:ffff000439200000 HPFAR:0000000004792000 PAR:0000000000000000 [ 35.001329] VCPU:0000000000000000 ]--- Fix this by explicitly excluding the hypervisor's memory pool from kmemleak like we already do for the hyp BSS. Cc: Mike Rapoport Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Signed-off-by: Quentin Perret Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220616161135.3997786-1-qperret@google.com --- arch/arm64/kvm/arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a0188144a122..83a7f61354d3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2112,11 +2112,11 @@ static int finalize_hyp_mode(void) return 0; /* - * Exclude HYP BSS from kmemleak so that it doesn't get peeked - * at, which would end badly once the section is inaccessible. - * None of other sections should ever be introspected. + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); return pkvm_drop_host_privileges(); } -- cgit v1.2.3 From cbc6d44867a24130ee528c20cffcbc28b3e09693 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Jun 2022 09:53:18 +0100 Subject: KVM: arm64: Add Oliver as a reviewer Oliver Upton has agreed to help with reviewing the KVM/arm64 patches, and has been doing so for a while now, so adding him as to the reviewer list. Note that Oliver is using a different email address for this purpose, rather than the one his been using for his other contributions. Signed-off-by: Marc Zyngier Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20220616085318.1303657-1-maz@kernel.org --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a6d3bd9d2a8d..7192d1277558 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10821,6 +10821,7 @@ M: Marc Zyngier R: James Morse R: Alexandru Elisei R: Suzuki K Poulose +R: Oliver Upton L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers) S: Maintained -- cgit v1.2.3 From e83031564137cf37e07c2d10ad468046ff48a0cf Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 18 May 2022 11:45:29 -0700 Subject: riscv: Fix ALT_THEAD_PMA's asm parameters After commit a35707c3d850 ("riscv: add memory-type errata for T-Head"), builds with LLVM's integrated assembler fail like: In file included from arch/riscv/kernel/asm-offsets.c:10: In file included from ./include/linux/mm.h:29: In file included from ./include/linux/pgtable.h:6: In file included from ./arch/riscv/include/asm/pgtable.h:114: ./arch/riscv/include/asm/pgtable-64.h:210:2: error: invalid input constraint '0' in asm ALT_THEAD_PMA(prot_val); ^ ./arch/riscv/include/asm/errata_list.h:88:4: note: expanded from macro 'ALT_THEAD_PMA' : "0"(_val), \ ^ This was reported upstream to LLVM where Jessica pointed out a couple of issues with the existing implementation of ALT_THEAD_PMA: * t3 is modified but not listed in the clobbers list. * "+r"(_val) marks _val as both an input and output of the asm but then "0"(_val) marks _val as an input matching constraint, which does not make much sense in this situation, as %1 is not actually used in the asm and matching constraints are designed to be used for different inputs that need to use the same register. Drop the matching contraint and shift all the operands by one, as %1 is unused, and mark t3 as clobbered. This resolves the build error and goes not cause any problems with GNU as. Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head") Link: https://github.com/ClangBuiltLinux/linux/issues/1641 Link: https://github.com/llvm/llvm-project/issues/55514 Link: https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html Suggested-by: Jessica Clarke Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20220518184529.454008-1-nathan@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 9e2888dbb5b1..416ead0f9a65 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -75,20 +75,20 @@ asm volatile(ALTERNATIVE( \ "nop\n\t" \ "nop\n\t" \ "nop", \ - "li t3, %2\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %1\n\t" \ + "slli t3, t3, %3\n\t" \ "and t3, %0, t3\n\t" \ "bne t3, zero, 2f\n\t" \ - "li t3, %3\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %2\n\t" \ + "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ : "+r"(_val) \ - : "0"(_val), \ - "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_PBMT_SHIFT) \ + : "t3") #else #define ALT_THEAD_PMA(_val) #endif -- cgit v1.2.3 From 50e34d78815e474d410f342fbe783b18192ca518 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:24 +0200 Subject: block: disable the elevator int del_gendisk The elevator is only used for file system requests, which are stopped in del_gendisk. Move disabling the elevator and freeing the scheduler tags to the end of del_gendisk instead of doing that work in disk_release and blk_cleanup_queue to avoid a use after free on q->tag_set from disk_release as the tag_set might not be alive at that point. Move the blk_qos_exit call as well, as it just depends on the elevator exit and would be the only reason to keep the not exactly cheap queue freeze in disk_release. Fixes: e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support") Reported-by: syzbot+3e3f419f4a7816471838@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Tested-by: syzbot+3e3f419f4a7816471838@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220614074827.458955-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 13 ------------- block/genhd.c | 39 +++++++++++---------------------------- 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 06ff5bbfe8f6..27fb1357ad4b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -322,19 +322,6 @@ void blk_cleanup_queue(struct request_queue *q) blk_mq_exit_queue(q); } - /* - * In theory, request pool of sched_tags belongs to request queue. - * However, the current implementation requires tag_set for freeing - * requests, so free the pool now. - * - * Queue has become frozen, there can't be any in-queue requests, so - * it is safe to free requests now. - */ - mutex_lock(&q->sysfs_lock); - if (q->elevator) - blk_mq_sched_free_rqs(q); - mutex_unlock(&q->sysfs_lock); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } diff --git a/block/genhd.c b/block/genhd.c index 27205ae47d59..e0675772178b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -652,6 +652,17 @@ void del_gendisk(struct gendisk *disk) blk_sync_queue(q); blk_flush_integrity(); + blk_mq_cancel_work_sync(q); + + blk_mq_quiesce_queue(q); + if (q->elevator) { + mutex_lock(&q->sysfs_lock); + elevator_exit(q); + mutex_unlock(&q->sysfs_lock); + } + rq_qos_exit(q); + blk_mq_unquiesce_queue(q); + /* * Allow using passthrough request again after the queue is torn down. */ @@ -1120,31 +1131,6 @@ static const struct attribute_group *disk_attr_groups[] = { NULL }; -static void disk_release_mq(struct request_queue *q) -{ - blk_mq_cancel_work_sync(q); - - /* - * There can't be any non non-passthrough bios in flight here, but - * requests stay around longer, including passthrough ones so we - * still need to freeze the queue here. - */ - blk_mq_freeze_queue(q); - - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - mutex_lock(&q->sysfs_lock); - elevator_exit(q); - mutex_unlock(&q->sysfs_lock); - } - rq_qos_exit(q); - __blk_mq_unfreeze_queue(q, true); -} - /** * disk_release - releases all allocated resources of the gendisk * @dev: the device representing this disk @@ -1166,9 +1152,6 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); - if (queue_is_mq(disk->queue)) - disk_release_mq(disk->queue); - blkcg_exit_queue(disk->queue); disk_release_events(disk); -- cgit v1.2.3 From 5cf9c91ba927119fc6606b938b1895bb2459d3bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:25 +0200 Subject: block: serialize all debugfs operations using q->debugfs_mutex Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it. To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h. Move debugfs_mutex next to the dentries that it protects and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-rq-qos.c | 2 ++ block/blk-rq-qos.h | 7 ++++++- block/blk-sysfs.c | 20 +++++++++----------- include/linux/blkdev.h | 8 ++++---- kernel/trace/blktrace.c | 3 --- 8 files changed, 52 insertions(+), 29 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7e4136a60e1c..f0fcfe1387cb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -711,11 +711,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } } -void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -746,6 +741,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -773,6 +770,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -790,6 +789,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } @@ -811,6 +812,10 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id) void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -820,6 +825,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); + lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; @@ -835,6 +842,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->rqos_debugfs_dir); q->rqos_debugfs_dir = NULL; } @@ -844,6 +853,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type; + lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent debugfs directory has not been created yet, return; * We will be called again later on with appropriate parent debugfs @@ -863,6 +874,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 69918f4170d6..771d45832878 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -42,10 +41,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index eb3c65a21362..a4f7c101b53b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -594,7 +594,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -607,7 +609,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } return 0; @@ -648,14 +652,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0; queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..249a6f05dd3b 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_queue_rqos(q); + mutex_unlock(&q->debugfs_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 68267007da1c..0e46052b018a 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); - if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } } static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) @@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_unfreeze_queue(q); + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 88bd41d4cb59..6e4801b217a7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -779,14 +779,13 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); - if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split); if (blk_queue_has_srcu(q)) @@ -836,17 +835,16 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - } - - mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); ret = disk_register_independent_access_ranges(disk, NULL); if (ret) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb6e3c31b3b7..73c886eba8e1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -482,7 +482,6 @@ struct request_queue { #endif /* CONFIG_BLK_DEV_ZONED */ int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -526,11 +525,12 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; bool mq_sysfs_init_done; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 10a32b0f2deb..fe04c6f96ca5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -770,14 +770,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } - - mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP -- cgit v1.2.3 From 99d055b4fd4bbb309c6cdb51a0d420669f777944 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:26 +0200 Subject: block: remove per-disk debugfs files in blk_unregister_queue The block debugfs files are created in blk_register_queue, which is called by add_disk and use a naming scheme based on the disk_name. After del_gendisk returns that name can be reused and thus we must not leave these debugfs files around, otherwise the kernel is unhappy and spews messages like: Directory XXXXX with parent 'block' already present! and the newly created devices will not have working debugfs files. Move the unregistration to blk_unregister_queue instead (which matches the sysfs unregistration) to make sure the debugfs life time rules match those of the disk name. As part of the move also make sure the whole debugfs unregistration is inside a single debugfs_mutex critical section. Note that this breaks blktests block/002, which checks that the debugfs directory has not been removed while blktests is running, but that particular check should simply be removed from the test case. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 8 -------- block/blk-mq-debugfs.h | 5 ----- block/blk-rq-qos.c | 4 ---- block/blk-sysfs.c | 16 ++++++++-------- 4 files changed, 8 insertions(+), 25 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f0fcfe1387cb..4d1ce9ef4318 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -840,14 +840,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - lockdep_assert_held(&q->debugfs_mutex); - - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 771d45832878..9c7d4b6117d4 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -35,7 +35,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -82,10 +81,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif #ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 249a6f05dd3b..d3a75693adbf 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,10 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_unregister_queue_rqos(q); - mutex_unlock(&q->debugfs_mutex); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6e4801b217a7..9b905e9443e4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -779,13 +779,6 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q); - mutex_lock(&q->debugfs_mutex); - blk_trace_shutdown(q); - debugfs_remove_recursive(q->debugfs_dir); - q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - mutex_unlock(&q->debugfs_mutex); - bioset_exit(&q->bio_split); if (blk_queue_has_srcu(q)) @@ -946,8 +939,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock); + mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; + q->rqos_debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); } -- cgit v1.2.3 From a09b314005f3a0956ebf56e01b3b80339df577cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:27 +0200 Subject: block: freeze the queue earlier in del_gendisk Freeze the queue earlier in del_gendisk so that the state does not change while we remove debugfs and sysfs files. Ming mentioned that being able to observer request in debugfs might be useful while the queue is being frozen in del_gendisk, which is made possible by this change. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-5-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index e0675772178b..278227ba1d53 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -623,6 +623,7 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); + blk_mq_freeze_queue_wait(q); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -646,8 +647,6 @@ void del_gendisk(struct gendisk *disk) pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); - blk_mq_freeze_queue_wait(q); - blk_throtl_cancel_bios(disk->queue); blk_sync_queue(q); -- cgit v1.2.3 From cc72b72073ac982a954d3b43519ca1c28f03c27c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 28 May 2022 00:55:39 +0900 Subject: tracing/kprobes: Check whether get_kretprobe() returns NULL in kretprobe_dispatcher() There is a small chance that get_kretprobe(ri) returns NULL in kretprobe_dispatcher() when another CPU unregisters the kretprobe right after __kretprobe_trampoline_handler(). To avoid this issue, kretprobe_dispatcher() checks the get_kretprobe() return value again. And if it is NULL, it returns soon because that kretprobe is under unregistering process. This issue has been introduced when the kretprobe is decoupled from the struct kretprobe_instance by commit d741bf41d7c7 ("kprobes: Remove kretprobe hash"). Before that commit, the struct kretprob_instance::rp directly points the kretprobe and it is never be NULL. Link: https://lkml.kernel.org/r/165366693881.797669.16926184644089588731.stgit@devnote2 Reported-by: Yonghong Song Fixes: d741bf41d7c7 ("kprobes: Remove kretprobe hash") Cc: Peter Zijlstra Cc: Ingo Molnar Cc: bpf Cc: Kernel Team Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_kprobe.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 93507330462c..a245ea673715 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1718,8 +1718,17 @@ static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); - struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return 0; + tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) -- cgit v1.2.3 From b9b6d4c925604b70d007feb4c77b8cc4c038d2da Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 15 Jun 2022 23:05:34 +0200 Subject: ARM: dts: bcm2711-rpi-400: Fix GPIO line names The GPIO expander line names has been fixed in the vendor tree last year, so upstream these changes. Fixes: 1c701accecf2 ("ARM: dts: Add Raspberry Pi 400 support") Reported-by: Ivan T. Ivanov Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2711-rpi-400.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711-rpi-400.dts b/arch/arm/boot/dts/bcm2711-rpi-400.dts index f4d2fc20397c..c53d9eb0b802 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-400.dts @@ -28,12 +28,12 @@ &expgpio { gpio-line-names = "BT_ON", "WL_ON", - "", + "PWR_LED_OFF", "GLOBAL_RESET", "VDD_SD_IO_SEL", - "CAM_GPIO", + "GLOBAL_SHUTDOWN", "SD_PWR_ON", - "SD_OC_N"; + "SHUTDOWN_REQUEST"; }; &genet_mdio { -- cgit v1.2.3 From f4b0d318097e45cbac5e14976f8bb56aa2cef504 Mon Sep 17 00:00:00 2001 From: sunliming Date: Thu, 2 Jun 2022 22:06:13 +0800 Subject: tracing: Simplify conditional compilation code in tracing_set_tracer() Two conditional compilation directives "#ifdef CONFIG_TRACER_MAX_TRACE" are used consecutively, and no other code in between. Simplify conditional the compilation code and only use one "#ifdef CONFIG_TRACER_MAX_TRACE". Link: https://lkml.kernel.org/r/20220602140613.545069-1-sunliming@kylinos.cn Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c95992e2c71..a8cfac0611bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6424,9 +6424,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) synchronize_rcu(); free_snapshot(tr); } -#endif -#ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) -- cgit v1.2.3 From 93a8c044b9a3711d594702daea9fbe2292c73a42 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Sun, 5 Jun 2022 17:27:29 +0800 Subject: tracefs: Fix syntax errors in comments Delete the redundant word 'to'. Link: https://lkml.kernel.org/r/20220605092729.13010-1-wangxiang@cdjrlc.com Signed-off-by: Xiang wangx Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index de7252715b12..81d26abf486f 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -553,7 +553,7 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) * * Only one instances directory is allowed. * - * The instances directory is special as it allows for mkdir and rmdir to + * The instances directory is special as it allows for mkdir and rmdir * to be done by userspace. When a mkdir or rmdir is performed, the inode * locks are released and the methods passed in (@mkdir and @rmdir) are * called without locks and with the name of the directory being created -- cgit v1.2.3 From 12c3e0c92fd7cb3d3b698d84fdde7dccb6ba8822 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Sun, 12 Jun 2022 07:42:32 -0700 Subject: tracing/uprobes: Remove unwanted initialization in __trace_uprobe_create() Remove the unwanted initialization of variable 'ret'. This fixes the clang scan warning: Value stored to 'ret' is never read [deadcode.DeadStores] Link: https://lkml.kernel.org/r/20220612144232.145209-1-gautammenghani201@gmail.com Signed-off-by: Gautam Menghani Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_uprobe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273cd..c3dc4f859a6b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -546,7 +546,6 @@ static int __trace_uprobe_create(int argc, const char **argv) bool is_return = false; int i, ret; - ret = 0; ref_ctr_offset = 0; switch (argv[0][0]) { -- cgit v1.2.3 From 6cf06c17e94f26c290fd3370a5c36514ae15ac43 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 16 Jun 2022 18:41:49 +1000 Subject: powerpc/mm: Move CMA reservations after initmem_init() After commit 11ac3e87ce09 ("mm: cma: use pageblock_order as the single alignment") there is an error at boot about the KVM CMA reservation failing, eg: kvm_cma_reserve: reserving 6553 MiB for global area cma: Failed to reserve 6553 MiB That makes it impossible to start KVM guests using the hash MMU with more than 2G of memory, because the VM is unable to allocate a large enough region for the hash page table, eg: $ qemu-system-ppc64 -enable-kvm -M pseries -m 4G ... qemu-system-ppc64: Failed to allocate KVM HPT of order 25: Cannot allocate memory Aneesh pointed out that this happens because when kvm_cma_reserve() is called, pageblock_order has not been initialised yet, and is still zero, causing the checks in cma_init_reserved_mem() against CMA_MIN_ALIGNMENT_PAGES to fail. Fix it by moving the call to kvm_cma_reserve() after initmem_init(). The pageblock_order is initialised in sparse_init() which is called from initmem_init(). Also move the hugetlb CMA reservation. Fixes: 11ac3e87ce09 ("mm: cma: use pageblock_order as the single alignment") Reviewed-by: Aneesh Kumar K.V Reviewed-by: Zi Yan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220616120033.1976732-1-mpe@ellerman.id.au --- arch/powerpc/kernel/setup-common.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index eb0077b302e2..1a02629ec70b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -935,12 +935,6 @@ void __init setup_arch(char **cmdline_p) /* Print various info about the machine that has been gathered so far. */ print_system_info(); - /* Reserve large chunks of memory for use by CMA for KVM. */ - kvm_cma_reserve(); - - /* Reserve large chunks of memory for us by CMA for hugetlb */ - gigantic_hugetlb_cma_reserve(); - klp_init_thread_info(&init_task); setup_initial_init_mm(_stext, _etext, _edata, _end); @@ -955,6 +949,13 @@ void __init setup_arch(char **cmdline_p) initmem_init(); + /* + * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must + * be called after initmem_init(), so that pageblock_order is initialised. + */ + kvm_cma_reserve(); + gigantic_hugetlb_cma_reserve(); + early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); if (ppc_md.setup_arch) -- cgit v1.2.3 From 20a9689b3607456d92c6fb764501f6a95950b098 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 11 Jun 2022 17:10:13 +0200 Subject: powerpc/microwatt: wire up rng during setup_arch() The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Fortunately, each platform already has a setup_arch function pointer, which means it's easy to wire this up. This commit also removes some noisy log messages that don't add much. Fixes: c25769fddaec ("powerpc/microwatt: Add support for hardware random number generator") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220611151015.548325-2-Jason@zx2c4.com --- arch/powerpc/platforms/microwatt/microwatt.h | 7 +++++++ arch/powerpc/platforms/microwatt/rng.c | 10 +++------- arch/powerpc/platforms/microwatt/setup.c | 8 ++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 arch/powerpc/platforms/microwatt/microwatt.h diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h new file mode 100644 index 000000000000..335417e95e66 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/microwatt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MICROWATT_H +#define _MICROWATT_H + +void microwatt_rng_init(void); + +#endif /* _MICROWATT_H */ diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c index 7bc4d1cbfaf0..8ece87d005c8 100644 --- a/arch/powerpc/platforms/microwatt/rng.c +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -11,6 +11,7 @@ #include #include #include +#include "microwatt.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -29,7 +30,7 @@ static int microwatt_get_random_darn(unsigned long *v) return 1; } -static __init int rng_init(void) +void __init microwatt_rng_init(void) { unsigned long val; int i; @@ -37,12 +38,7 @@ static __init int rng_init(void) for (i = 0; i < 10; i++) { if (microwatt_get_random_darn(&val)) { ppc_md.get_random_seed = microwatt_get_random_darn; - return 0; + return; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - - return -EIO; } -machine_subsys_initcall(, rng_init); diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c index 0b02603bdb74..6b32539395a4 100644 --- a/arch/powerpc/platforms/microwatt/setup.c +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -16,6 +16,8 @@ #include #include +#include "microwatt.h" + static void __init microwatt_init_IRQ(void) { xics_init(); @@ -32,10 +34,16 @@ static int __init microwatt_populate(void) } machine_arch_initcall(microwatt, microwatt_populate); +static void __init microwatt_setup_arch(void) +{ + microwatt_rng_init(); +} + define_machine(microwatt) { .name = "microwatt", .probe = microwatt_probe, .init_IRQ = microwatt_init_IRQ, + .setup_arch = microwatt_setup_arch, .progress = udbg_progress, .calibrate_decr = generic_calibrate_decr, }; -- cgit v1.2.3 From e561e472a3d441753bd012333b057f48fef1045b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 11 Jun 2022 17:10:15 +0200 Subject: powerpc/pseries: wire up rng during setup_arch() The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Fortunately, each platform already has a setup_arch function pointer, which means it's easy to wire this up. This commit also removes some noisy log messages that don't add much. Fixes: a489043f4626 ("powerpc/pseries: Implement arch_get_random_long() based on H_RANDOM") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220611151015.548325-4-Jason@zx2c4.com --- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/rng.c | 11 +++-------- arch/powerpc/platforms/pseries/setup.c | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index f5c916c839c9..1d75b7742ef0 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -122,4 +122,6 @@ void pseries_lpar_read_hblkrm_characteristics(void); static inline void pseries_lpar_read_hblkrm_characteristics(void) { } #endif +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 6268545947b8..6ddfdeaace9e 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include #include #include +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - of_node_put(dn); - return 0; } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index afb074269b42..ee4f1db49515 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -839,6 +839,7 @@ static void __init pSeries_setup_arch(void) } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; + pseries_rng_init(); } static void pseries_panic(char *str) -- cgit v1.2.3 From ec6d0dde71d760aa60316f8d1c9a1b0d99213529 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 9 Jun 2022 16:03:28 +0530 Subject: powerpc: Enable execve syscall exit tracepoint On execve[at], we are zero'ing out most of the thread register state including gpr[0], which contains the syscall number. Due to this, we fail to trigger the syscall exit tracepoint properly. Fix this by retaining gpr[0] in the thread register state. Before this patch: # tail /sys/kernel/debug/tracing/trace cat-123 [000] ..... 61.449351: sys_execve(filename: 7fffa6b23448, argv: 7fffa6b233e0, envp: 7fffa6b233f8) cat-124 [000] ..... 62.428481: sys_execve(filename: 7fffa6b23448, argv: 7fffa6b233e0, envp: 7fffa6b233f8) echo-125 [000] ..... 65.813702: sys_execve(filename: 7fffa6b23378, argv: 7fffa6b233a0, envp: 7fffa6b233b0) echo-125 [000] ..... 65.822214: sys_execveat(fd: 0, filename: 1009ac48, argv: 7ffff65d0c98, envp: 7ffff65d0ca8, flags: 0) After this patch: # tail /sys/kernel/debug/tracing/trace cat-127 [000] ..... 100.416262: sys_execve(filename: 7fffa41b3448, argv: 7fffa41b33e0, envp: 7fffa41b33f8) cat-127 [000] ..... 100.418203: sys_execve -> 0x0 echo-128 [000] ..... 103.873968: sys_execve(filename: 7fffa41b3378, argv: 7fffa41b33a0, envp: 7fffa41b33b0) echo-128 [000] ..... 103.875102: sys_execve -> 0x0 echo-128 [000] ..... 103.882097: sys_execveat(fd: 0, filename: 1009ac48, argv: 7fffd10d2148, envp: 7fffd10d2158, flags: 0) echo-128 [000] ..... 103.883225: sys_execveat -> 0x0 Cc: stable@vger.kernel.org Signed-off-by: Naveen N. Rao Tested-by: Sumit Dubey2 Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220609103328.41306-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ee0433809621..0fbda89cd1bb 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1855,7 +1855,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; -- cgit v1.2.3 From 7bc08056a6dabc3a1442216daf527edf61ac24b6 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 14 Jun 2022 23:49:52 +1000 Subject: powerpc/rtas: Allow ibm,platform-dump RTAS call with null buffer address Add a special case to block_rtas_call() to allow the ibm,platform-dump RTAS call through the RTAS filter if the buffer address is 0. According to PAPR, ibm,platform-dump is called with a null buffer address to notify the platform firmware that processing of a particular dump is finished. Without this, on a pseries machine with CONFIG_PPC_RTAS_FILTER enabled, an application such as rtas_errd that is attempting to retrieve a dump will encounter an error at the end of the retrieval process. Fixes: bd59380c5ba4 ("powerpc/rtas: Restrict RTAS requests from userspace") Cc: stable@vger.kernel.org Reported-by: Sathvika Vasireddy Signed-off-by: Andrew Donnellan Reviewed-by: Tyrel Datwyler Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220614134952.156010-1-ajd@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a6fce3106e02..693133972294 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1071,7 +1071,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "get-time-of-day", -1, -1, -1, -1, -1 }, { "ibm,get-vpd", -1, 0, -1, 1, 2 }, { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, @@ -1120,6 +1120,15 @@ static bool block_rtas_call(int token, int nargs, size = 1; end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + if (!in_rmo_buf(base, end)) goto err; } -- cgit v1.2.3 From 856216b70a41ff3f8c866b627546afa01567b389 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 17 Jun 2022 08:13:04 -0700 Subject: arm64: dts: ti: k3-j721s2: Fix overlapping GICD memory region GICD region was overlapping with GICR causing the latter to not map successfully, and in turn the gic-v3 driver would fail to initialize. This issue was hidden till commit 2b2cd74a06c3 ("irqchip/gic-v3: Claim iomem resources") replaced of_iomap() calls with of_io_request_and_map() that internally called request_mem_region(). Respective console output before this patchset: [ 0.000000] GICv3: /bus@100000/interrupt-controller@1800000: couldn't map region 0 Fixes: b8545f9d3a54 ("arm64: dts: ti: Add initial support for J721S2 SoC") Cc: linux-stable@vger.kernel.org Cc: Marc Zyngier Cc: Robin Murphy Cc: Nishanth Menon Signed-off-by: Matt Ranostay Acked-by: Marc Zyngier Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20220617151304.446607-1-mranostay@ti.com --- arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi index be7f39299894..19966f72c5b3 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi @@ -33,7 +33,7 @@ ranges; #interrupt-cells = <3>; interrupt-controller; - reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ + reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */ <0x00 0x01900000 0x00 0x100000>, /* GICR */ <0x00 0x6f000000 0x00 0x2000>, /* GICC */ <0x00 0x6f010000 0x00 0x1000>, /* GICH */ -- cgit v1.2.3 From 0c0af88f3f318e73237f7fadd02d0bf2b6c996bb Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Thu, 12 May 2022 12:18:58 +0530 Subject: arm64: dts: ti: k3-am64-main: Remove support for HS400 speed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AM64 SoC, does not support HS400 and HS200 is the maximum supported speed mode[1]. Therefore, fix the device tree node to reflect the same. [1] - https://www.ti.com/lit/ds/symlink/am6442.pdf (SPRSP56C – JANUARY 2021 – REVISED FEBRUARY 2022) Fixes: 8abae9389bdb ("arm64: dts: ti: Add support for AM642 SoC") Signed-off-by: Aswath Govindraju Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20220512064859.32059-1-a-govindraju@ti.com --- arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index f64b368c6c37..cdb530597c5e 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -456,13 +456,11 @@ clock-names = "clk_ahb", "clk_xin"; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; ti,trm-icp = <0x2>; ti,otap-del-sel-legacy = <0x0>; ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x6>; ti,otap-del-sel-hs200 = <0x7>; - ti,otap-del-sel-hs400 = <0x4>; }; sdhci1: mmc@fa00000 { -- cgit v1.2.3 From 3c7a52217a8c1e674a9e15b71a7239d71a4d9cfd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 18 Jun 2022 09:11:18 -0700 Subject: drm/msm: Drop update_fences() I noticed while looking at some traces, that we could miss calls to msm_update_fence(), as the irq could have raced with retire_submits() which could have already popped the last submit on a ring out of the queue of in-flight submits. But walking the list of submits in the irq handler isn't really needed, as dma_fence_is_signaled() will dtrt. So lets just drop it entirely. v2: use spin_lock_irqsave/restore as we are no longer protected by the spin_lock_irqsave/restore() in update_fences() Reported-by: Steev Klimaszewski Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark Tested-by: Steev Klimaszewski Patchwork: https://patchwork.freedesktop.org/patch/490136/ Link: https://lore.kernel.org/r/20220618161120.3451993-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_fence.c | 8 +++++--- drivers/gpu/drm/msm/msm_gpu.c | 22 ++-------------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 3df255402a33..38e3323bc232 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -46,12 +46,14 @@ bool msm_fence_completed(struct msm_fence_context *fctx, uint32_t fence) (int32_t)(*fctx->fenceptr - fence) >= 0; } -/* called from workqueue */ +/* called from irq handler and workqueue (in recover path) */ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) { - spin_lock(&fctx->spinlock); + unsigned long flags; + + spin_lock_irqsave(&fctx->spinlock, flags); fctx->completed_fence = max(fence, fctx->completed_fence); - spin_unlock(&fctx->spinlock); + spin_unlock_irqrestore(&fctx->spinlock, flags); } struct msm_fence { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 244511f85044..cedc88cf8083 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -164,24 +164,6 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) return ret; } -static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, - uint32_t fence) -{ - struct msm_gem_submit *submit; - unsigned long flags; - - spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) { - if (fence_after(submit->seqno, fence)) - break; - - msm_update_fence(submit->ring->fctx, - submit->hw_fence->seqno); - dma_fence_signal(submit->hw_fence); - } - spin_unlock_irqrestore(&ring->submit_lock, flags); -} - #ifdef CONFIG_DEV_COREDUMP static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) @@ -438,7 +420,7 @@ static void recover_worker(struct kthread_work *work) if (ring == cur_ring) fence++; - update_fences(gpu, ring, fence); + msm_update_fence(ring->fctx, fence); } if (msm_gpu_active(gpu)) { @@ -736,7 +718,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) int i; for (i = 0; i < gpu->nr_rings; i++) - update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); + msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence); kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu); -- cgit v1.2.3 From c8af219d18502c52319df8d4e3dcfe29a3ca31ab Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 18 Jun 2022 09:11:19 -0700 Subject: drm/msm: Don't overwrite hw fence in hw_init Prior to the last commit, this could result in setting the GPU written fence value back to an older value, if we had missed updating completed_fence prior to suspend. This was mostly harmless as the GPU would eventually overwrite it again with the correct value. But we should just not do this. Instead just leave a sanity check that the fence looks plausible (in case the GPU scribbled on memory). Reported-by: Steev Klimaszewski Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark Tested-by: Steev Klimaszewski Patchwork: https://patchwork.freedesktop.org/patch/490138/ Link: https://lore.kernel.org/r/20220618161120.3451993-2-robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 ++++++++--- drivers/gpu/drm/msm/msm_gpu.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f944b69e2a25..efe9840e28fa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -498,10 +498,15 @@ int adreno_hw_init(struct msm_gpu *gpu) ring->cur = ring->start; ring->next = ring->start; - - /* reset completed fence seqno: */ - ring->memptrs->fence = ring->fctx->completed_fence; ring->memptrs->rptr = 0; + + /* Detect and clean up an impossible fence, ie. if GPU managed + * to scribble something invalid, we don't want that to confuse + * us into mistakingly believing that submits have completed. + */ + if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { + ring->memptrs->fence = ring->fctx->last_fence; + } } return 0; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index cedc88cf8083..c8cd9bfa3eeb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -418,7 +418,7 @@ static void recover_worker(struct kthread_work *work) * one more to clear the faulting submit */ if (ring == cur_ring) - fence++; + ring->memptrs->fence = ++fence; msm_update_fence(ring->fctx, fence); } -- cgit v1.2.3 From b9cc4598607cb7f7eae5c75fc1e3209cd52ff5e0 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 7 Jun 2022 15:08:38 +0400 Subject: drm/msm/mdp4: Fix refcount leak in mdp4_modeset_init_intf of_graph_get_remote_node() returns remote device node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 86418f90a4c1 ("drm: convert drivers to use of_graph_get_remote_node") Signed-off-by: Miaoqian Lin Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/488473/ Link: https://lore.kernel.org/r/20220607110841.53889-1-linmq006@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index fb48c8c19ec3..17cb1fc78379 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -216,6 +216,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, encoder = mdp4_lcdc_encoder_init(dev, panel_node); if (IS_ERR(encoder)) { DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n"); + of_node_put(panel_node); return PTR_ERR(encoder); } @@ -225,6 +226,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, connector = mdp4_lvds_connector_init(dev, panel_node, encoder); if (IS_ERR(connector)) { DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n"); + of_node_put(panel_node); return PTR_ERR(connector); } -- cgit v1.2.3 From d80c3ba0ac247791a4ed7a0cd865a64906c8906a Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Mon, 6 Jun 2022 10:55:39 -0700 Subject: drm/msm/dp: check core_initialized before disable interrupts at dp_display_unbind() During msm initialize phase, dp_display_unbind() will be called to undo initializations had been done by dp_display_bind() previously if there is error happen at msm_drm_bind. In this case, core_initialized flag had to be check to make sure clocks is on before update DP controller register to disable HPD interrupts. Otherwise system will crash due to below NOC fatal error. QTISECLIB [01f01a7ad]CNOC2 ERROR: ERRLOG0_LOW = 0x00061007 QTISECLIB [01f01a7ad]GEM_NOC ERROR: ERRLOG0_LOW = 0x00001007 QTISECLIB [01f0371a0]CNOC2 ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f055297]GEM_NOC ERROR: ERRLOG0_HIGH = 0x00000003 QTISECLIB [01f072beb]CNOC2 ERROR: ERRLOG1_LOW = 0x00000024 QTISECLIB [01f0914b8]GEM_NOC ERROR: ERRLOG1_LOW = 0x00000042 QTISECLIB [01f0ae639]CNOC2 ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0cc73f]GEM_NOC ERROR: ERRLOG1_HIGH = 0x00004002 QTISECLIB [01f0ea092]CNOC2 ERROR: ERRLOG2_LOW = 0x0009020c QTISECLIB [01f10895f]GEM_NOC ERROR: ERRLOG2_LOW = 0x0ae9020c QTISECLIB [01f125ae1]CNOC2 ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f143be7]GEM_NOC ERROR: ERRLOG2_HIGH = 0x00000000 QTISECLIB [01f16153a]CNOC2 ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f17fe07]GEM_NOC ERROR: ERRLOG3_LOW = 0x00000000 QTISECLIB [01f19cf89]CNOC2 ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1bb08e]GEM_NOC ERROR: ERRLOG3_HIGH = 0x00000000 QTISECLIB [01f1d8a31]CNOC2 ERROR: SBM1 FAULTINSTATUS0_LOW = 0x00000002 QTISECLIB [01f1f72a4]GEM_NOC ERROR: SBM0 FAULTINSTATUS0_LOW = 0x00000001 QTISECLIB [01f21a217]CNOC3 ERROR: ERRLOG0_LOW = 0x00000006 QTISECLIB [01f23dfd3]NOC error fatal changes in v2: -- drop the first patch (drm/msm: enable msm irq after all initializations are done successfully at msm_drm_init()) since the problem had been fixed by other patch Fixes: 570d3e5d28db ("drm/msm/dp: stop event kernel thread when DP unbind") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/488387/ Link: https://lore.kernel.org/r/1654538139-7450-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index da5c03a8cc4c..2b7263976740 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -309,7 +309,8 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct msm_drm_private *priv = dev_get_drvdata(master); /* disable all HPD interrupts */ - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + if (dp->core_initialized) + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); kthread_stop(dp->ev_tsk); -- cgit v1.2.3 From 2211e34a9d57973993b644c4a2c76086cb6ce7fd Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 16 Jun 2022 12:26:46 -0700 Subject: drm/msm/dpu: limit wb modes based on max_mixer_width As explained in [1], using max_linewidth to limit the modes does not seem to remove 4K modes on chipsets such as sm8250 where the max_linewidth actually supports 4k. This would have been alright if dual SSPP support was present but otherwise fails the per SSPP bandwidth check. The ideal way to implement this would be to filter out the modes which will exceed the bandwidth check by computing it. But this would be an exhaustive solution till we have dual SSPP support. Let's instead use max_mixer_width to limit the modes. max_mixer_width still remains 2560 on sm8250 so even if the max_linewidth is 4096, the only way 4k modes could have been supported is to have source split enabled on the SSPP. Since source split support is not enabled yet in DPU driver, enforce max_mixer_width as the upper limit on the modes. [1] https://patchwork.freedesktop.org/patch/489662/ Fixes: e67dcecda06f ("drm/msm/dpu: limit writeback modes according to max_linewidth") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/489893/ Link: https://lore.kernel.org/r/1655407606-21760-1-git-send-email-quic_abhinavk@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 399115e4e217..2fd787079f9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -11,7 +11,14 @@ static int dpu_wb_conn_get_modes(struct drm_connector *connector) struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); - return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_linewidth, + /* + * We should ideally be limiting the modes only to the maxlinewidth but + * on some chipsets this will allow even 4k modes to be added which will + * fail the per SSPP bandwidth checks. So, till we have dual-SSPP support + * and source split support added lets limit the modes based on max_mixer_width + * as 4K modes can then be supported. + */ + return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_mixer_width, dev->mode_config.max_height); } -- cgit v1.2.3 From a6e2af64a79afa7f1b29375b5231e840a84bb845 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Thu, 16 Jun 2022 13:26:40 -0700 Subject: drm/msm/dp: force link training for display resolution change Display resolution change is implemented through drm modeset. Older modeset (resolution) has to be disabled first before newer modeset (resolution) can be enabled. Display disable will turn off both pixel clock and main link clock so that main link have to be re-trained during display enable to have new video stream flow again. At current implementation, display enable function manually kicks up irq_hpd_handle which will read panel link status and start link training if link status is not in sync state. However, there is rare case that a particular panel links status keep staying in sync for some period of time after main link had been shut down previously at display disabled. In this case, main link retraining will not be executed by irq_hdp_handle(). Hence video stream of newer display resolution will fail to be transmitted to panel due to main link is not in sync between host and panel. This patch will bypass irq_hpd_handle() in favor of directly call dp_ctrl_on_stream() to always perform link training in regardless of main link status. So that no unexpected exception resolution change failure cases will happen. Also this implementation are more efficient than manual kicking off irq_hpd_handle function. Changes in v2: -- set force_link_train flag on DP only (is_edp == false) Changes in v3: -- revise commit text -- add Fixes tag Changes in v4: -- revise commit text Changes in v5: -- fix spelling at commit text Changes in v6: -- split dp_ctrl_on_stream() for phy test case -- revise commit text for modeset Changes in v7: -- drop 0 assignment at local variable (ret = 0) Changes in v8: -- add patch to remove pixel_rate from dp_ctrl Changes in v9: -- forward declare dp_ctrl_on_stream_phy_test_report() Fixes: 62671d2ef24b ("drm/msm/dp: fixes wrong connection state caused by failure of link train") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/489895/ Link: https://lore.kernel.org/r/1655411200-7255-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 33 +++++++++++++++++++++++++-------- drivers/gpu/drm/msm/dp/dp_ctrl.h | 2 +- drivers/gpu/drm/msm/dp/dp_display.c | 13 ++++++------- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index f3e333eff32d..f18588aecea2 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1533,6 +1533,8 @@ end: return ret; } +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl); + static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) { int ret = 0; @@ -1556,7 +1558,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) ret = dp_ctrl_on_link(&ctrl->dp_ctrl); if (!ret) - ret = dp_ctrl_on_stream(&ctrl->dp_ctrl); + ret = dp_ctrl_on_stream_phy_test_report(&ctrl->dp_ctrl); else DRM_ERROR("failed to enable DP link controller\n"); @@ -1812,7 +1814,27 @@ static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl) return dp_ctrl_setup_main_link(ctrl, &training_step); } -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) +static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl) +{ + int ret; + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; + + ret = dp_ctrl_enable_stream_clocks(ctrl); + if (ret) { + DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret); + return ret; + } + + dp_ctrl_send_phy_test_pattern(ctrl); + + return 0; +} + +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train) { int ret = 0; bool mainlink_ready = false; @@ -1848,12 +1870,7 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) goto end; } - if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) { - dp_ctrl_send_phy_test_pattern(ctrl); - return 0; - } - - if (!dp_ctrl_channel_eq_ok(ctrl)) + if (force_link_train || !dp_ctrl_channel_eq_ok(ctrl)) dp_ctrl_link_retrain(ctrl); /* stop txing train pattern to end link training */ diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 0745fde01b45..b563e2e3bfe5 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -21,7 +21,7 @@ struct dp_ctrl { }; int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); -int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); +int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off_link(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 2b7263976740..a6117926a274 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -873,7 +873,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) return 0; } - rc = dp_ctrl_on_stream(dp->ctrl); + rc = dp_ctrl_on_stream(dp->ctrl, data); if (!rc) dp_display->power_on = true; @@ -1660,6 +1660,7 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) int rc = 0; struct dp_display_private *dp_display; u32 state; + bool force_link_train = false; dp_display = container_of(dp, struct dp_display_private, dp_display); if (!dp_display->dp_mode.drm_mode.clock) { @@ -1694,10 +1695,12 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) state = dp_display->hpd_state; - if (state == ST_DISPLAY_OFF) + if (state == ST_DISPLAY_OFF) { dp_display_host_phy_init(dp_display); + force_link_train = true; + } - dp_display_enable(dp_display, 0); + dp_display_enable(dp_display, force_link_train); rc = dp_display_post_enable(dp); if (rc) { @@ -1706,10 +1709,6 @@ void dp_bridge_enable(struct drm_bridge *drm_bridge) dp_display_unprepare(dp); } - /* manual kick off plug event to train link */ - if (state == ST_DISPLAY_OFF) - dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0); - /* completed connection */ dp_display->hpd_state = ST_CONNECTED; -- cgit v1.2.3 From a0117dc956429f2ede17b323046e1968d1849150 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 20:44:32 +0800 Subject: xtensa: Fix refcount leak bug in time.c In calibrate_ccount(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Cc: stable@vger.kernel.org Signed-off-by: Liang He Message-Id: <20220617124432.4049006-1-windhl@126.com> Signed-off-by: Max Filippov --- arch/xtensa/kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index e8ceb1528608..16b8a6273772 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -154,6 +154,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; -- cgit v1.2.3 From 173940b3ae40114d4179c251a98ee039dc9cd5b3 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 19:53:23 +0800 Subject: xtensa: xtfpga: Fix refcount leak bug in setup In machine_setup(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Cc: stable@vger.kernel.org Signed-off-by: Liang He Message-Id: <20220617115323.4046905-1-windhl@126.com> Signed-off-by: Max Filippov --- arch/xtensa/platforms/xtfpga/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 538e6748e85a..c79c1d09ea86 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -133,6 +133,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); -- cgit v1.2.3 From ca5dabcff1df6bc8c413922b5fa63cc602858803 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 10 Jun 2022 17:43:43 +0200 Subject: powerpc/prom_init: Fix build failure with GCC_PLUGIN_STRUCTLEAK_BYREF_ALL and KASAN When CONFIG_KASAN is selected, we expect prom_init to use __memset() because it is too early to use memset(). But with CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL, the compiler adds calls to memset() to clear objects on stack, hence the following failure: PROMCHK arch/powerpc/kernel/prom_init_check Error: External symbol 'memset' referenced from prom_init.c make[2]: *** [arch/powerpc/kernel/Makefile:204 : arch/powerpc/kernel/prom_init_check] Erreur 1 prom_find_machine_type() is called from prom_init() and is called only once, so lets put compat[] in BSS instead of stack to avoid that. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3802811f7cf94f730be44688539c01bba3a3b5c0.1654875808.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 04694ec423f6..13d6cb188835 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2302,7 +2302,7 @@ static void __init prom_init_stdout(void) static int __init prom_find_machine_type(void) { - char compat[256]; + static char compat[256] __prombss; int len, i = 0; #ifdef CONFIG_PPC64 phandle rtas; -- cgit v1.2.3 From be33d52ef5b4bdfec04cfdad39368c343bac97a3 Mon Sep 17 00:00:00 2001 From: Maya Matuszczyk Date: Sun, 19 Jun 2022 13:19:52 +0200 Subject: drm: panel-orientation-quirks: Add quirk for Aya Neo Next The device is identified by "NEXT" in board name, however there are different versions of it, "Next Advance" and "Next Pro", that have different DMI board names. Due to a production error a batch or two have their board names prefixed by "AYANEO", this makes it 6 different DMI board names. To save some space in final kernel image DMI_MATCH is used instead of DMI_EXACT_MATCH. Signed-off-by: Maya Matuszczyk Reviewed-by: Javier Martinez Canillas Reviewed-by: Hans de Goede Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20220619111952.8487-1-maccraft123mc@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 4e853acfd1e8..df87ba99a87c 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -152,6 +152,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO NEXT */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"), + DMI_MATCH(DMI_BOARD_NAME, "NEXT"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), -- cgit v1.2.3 From 6f6bd7591945c679b7f595119ea997b19f5794db Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Wed, 11 May 2022 12:00:06 +0300 Subject: iio: freq: admv1014: Fix warning about dubious x & !y and improve readability The warning comes from __BF_FIELD_CHECK() specifically BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ The code was using !(enum value) which is not particularly easy to follow so replace that with explicit matching and use of ? 0 : 1; or ? 1 : 0; to improve readability. Signed-off-by: Antoniu Miclaus Link: https://lore.kernel.org/r/20220511090006.90502-1-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/admv1014.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/frequency/admv1014.c b/drivers/iio/frequency/admv1014.c index a7994f8e6b9b..1aac5665b5de 100644 --- a/drivers/iio/frequency/admv1014.c +++ b/drivers/iio/frequency/admv1014.c @@ -700,8 +700,10 @@ static int admv1014_init(struct admv1014_state *st) ADMV1014_DET_EN_MSK; enable_reg = FIELD_PREP(ADMV1014_P1DB_COMPENSATION_MSK, st->p1db_comp ? 3 : 0) | - FIELD_PREP(ADMV1014_IF_AMP_PD_MSK, !(st->input_mode)) | - FIELD_PREP(ADMV1014_BB_AMP_PD_MSK, st->input_mode) | + FIELD_PREP(ADMV1014_IF_AMP_PD_MSK, + (st->input_mode == ADMV1014_IF_MODE) ? 0 : 1) | + FIELD_PREP(ADMV1014_BB_AMP_PD_MSK, + (st->input_mode == ADMV1014_IF_MODE) ? 1 : 0) | FIELD_PREP(ADMV1014_DET_EN_MSK, st->det_en); return __admv1014_spi_update_bits(st, ADMV1014_REG_ENABLE, enable_reg_msk, enable_reg); -- cgit v1.2.3 From b2f5ad97645e1deb5ca9bcb7090084b92cae35d2 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 10 May 2022 17:24:31 +0800 Subject: iio: gyro: mpu3050: Fix the error handling in mpu3050_power_up() The driver should disable regulators when fails at regmap_update_bits(). Signed-off-by: Zheyu Ma Reviewed-by: Linus Walleij Cc: Link: https://lore.kernel.org/r/20220510092431.1711284-1-zheyuma97@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ea387efab62d..f4c2f4cb4834 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -874,6 +874,7 @@ static int mpu3050_power_up(struct mpu3050 *mpu3050) ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM, MPU3050_PWR_MGM_SLEEP, 0); if (ret) { + regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs); dev_err(mpu3050->dev, "error setting power mode\n"); return ret; } -- cgit v1.2.3 From 78601726d4a59a291acc5a52da1d3a0a6831e4e8 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 19 May 2022 11:19:25 +0200 Subject: iio: trigger: sysfs: fix use-after-free on remove Ensure that the irq_work has completed before the trigger is freed. ================================================================== BUG: KASAN: use-after-free in irq_work_run_list Read of size 8 at addr 0000000064702248 by task python3/25 Call Trace: irq_work_run_list irq_work_tick update_process_times tick_sched_handle tick_sched_timer __hrtimer_run_queues hrtimer_interrupt Allocated by task 25: kmem_cache_alloc_trace iio_sysfs_trig_add dev_attr_store sysfs_kf_write kernfs_fop_write_iter new_sync_write vfs_write ksys_write sys_write Freed by task 25: kfree iio_sysfs_trig_remove dev_attr_store sysfs_kf_write kernfs_fop_write_iter new_sync_write vfs_write ksys_write sys_write ================================================================== Fixes: f38bc926d022 ("staging:iio:sysfs-trigger: Use irq_work to properly active trigger") Signed-off-by: Vincent Whitchurch Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20220519091925.1053897-1-vincent.whitchurch@axis.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/iio-trig-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index 2a4b75897910..3d911c24b265 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -191,6 +191,7 @@ static int iio_sysfs_trigger_remove(int id) } iio_trigger_unregister(t->trig); + irq_work_sync(&t->work); iio_trigger_free(t->trig); list_del(&t->l); -- cgit v1.2.3 From d836715f588ea15f905f607c27bc693587058db4 Mon Sep 17 00:00:00 2001 From: Jialin Zhang Date: Tue, 17 May 2022 11:35:26 +0800 Subject: iio: adc: rzg2l_adc: add missing fwnode_handle_put() in rzg2l_adc_parse_properties() fwnode_handle_put() should be used when terminating device_for_each_child_node() iteration with break or return to prevent stale device node references from being left behind. Fixes: d484c21bacfa ("iio: adc: Add driver for Renesas RZ/G2L A/D converter") Reported-by: Hulk Robot Signed-off-by: Jialin Zhang Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220517033526.2035735-1-zhangjialin11@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rzg2l_adc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 7585144b9715..5b09a93fdf34 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -334,11 +334,15 @@ static int rzg2l_adc_parse_properties(struct platform_device *pdev, struct rzg2l i = 0; device_for_each_child_node(&pdev->dev, fwnode) { ret = fwnode_property_read_u32(fwnode, "reg", &channel); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } - if (channel >= RZG2L_ADC_MAX_CHANNELS) + if (channel >= RZG2L_ADC_MAX_CHANNELS) { + fwnode_handle_put(fwnode); return -EINVAL; + } chan_array[i].type = IIO_VOLTAGE; chan_array[i].indexed = 1; -- cgit v1.2.3 From 47dcf770abc793f347a65a24c24d550c936f08b0 Mon Sep 17 00:00:00 2001 From: Jialin Zhang Date: Tue, 17 May 2022 11:30:20 +0800 Subject: iio: adc: ti-ads131e08: add missing fwnode_handle_put() in ads131e08_alloc_channels() fwnode_handle_put() should be used when terminating device_for_each_child_node() iteration with break or return to prevent stale device node references from being left behind. Fixes: d935eddd2799 ("iio: adc: Add driver for Texas Instruments ADS131E0x ADC family") Reported-by: Hulk Robot Signed-off-by: Jialin Zhang Link: https://lore.kernel.org/r/20220517033020.2033324-1-zhangjialin11@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads131e08.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 0c2025a22575..80a09817c119 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -739,7 +739,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) device_for_each_child_node(dev, node) { ret = fwnode_property_read_u32(node, "reg", &channel); if (ret) - return ret; + goto err_child_out; ret = fwnode_property_read_u32(node, "ti,gain", &tmp); if (ret) { @@ -747,7 +747,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) } else { ret = ads131e08_pga_gain_to_field_value(st, tmp); if (ret < 0) - return ret; + goto err_child_out; channel_config[i].pga_gain = tmp; } @@ -758,7 +758,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) } else { ret = ads131e08_validate_channel_mux(st, tmp); if (ret) - return ret; + goto err_child_out; channel_config[i].mux = tmp; } @@ -784,6 +784,10 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev) st->channel_config = channel_config; return 0; + +err_child_out: + fwnode_handle_put(node); + return ret; } static void ads131e08_regulator_disable(void *data) -- cgit v1.2.3 From d2214cca4d3eadc74eac9e30301ec7cad5355f00 Mon Sep 17 00:00:00 2001 From: Yannick Brosseau Date: Mon, 16 May 2022 16:39:38 -0400 Subject: iio: adc: stm32: Fix ADCs iteration in irq handler The irq handler was only checking the mask for the first ADCs in the case of the F4 and H7 generation, since it was iterating up to the num_irq value. This patch add the maximum number of ADC in the common register, which map to the number of entries of eoc_msk and ovr_msk in stm32_adc_common_regs. This allow the handler to check all ADCs in that module. Tested on a STM32F429NIH6. Fixes: 695e2f5c289b ("iio: adc: stm32-adc: fix a regression when using dma and irq") Signed-off-by: Yannick Brosseau Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220516203939.3498673-2-yannick.brosseau@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 142656232157..bb04deeb7992 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -64,6 +64,7 @@ struct stm32_adc_priv; * @max_clk_rate_hz: maximum analog clock rate (Hz, from datasheet) * @has_syscfg: SYSCFG capability flags * @num_irqs: number of interrupt lines + * @num_adcs: maximum number of ADC instances in the common registers */ struct stm32_adc_priv_cfg { const struct stm32_adc_common_regs *regs; @@ -71,6 +72,7 @@ struct stm32_adc_priv_cfg { u32 max_clk_rate_hz; unsigned int has_syscfg; unsigned int num_irqs; + unsigned int num_adcs; }; /** @@ -352,7 +354,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) * before invoking the interrupt handler (e.g. call ISR only for * IRQ-enabled ADCs). */ - for (i = 0; i < priv->cfg->num_irqs; i++) { + for (i = 0; i < priv->cfg->num_adcs; i++) { if ((status & priv->cfg->regs->eoc_msk[i] && stm32_adc_eoc_enabled(priv, i)) || (status & priv->cfg->regs->ovr_msk[i])) @@ -792,6 +794,7 @@ static const struct stm32_adc_priv_cfg stm32f4_adc_priv_cfg = { .clk_sel = stm32f4_adc_clk_sel, .max_clk_rate_hz = 36000000, .num_irqs = 1, + .num_adcs = 3, }; static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { @@ -800,6 +803,7 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER, .num_irqs = 1, + .num_adcs = 2, }; static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { @@ -808,6 +812,7 @@ static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .max_clk_rate_hz = 40000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, + .num_adcs = 2, }; static const struct of_device_id stm32_adc_of_match[] = { -- cgit v1.2.3 From 99bded02dae5e1e2312813506c41dc8db2fb656c Mon Sep 17 00:00:00 2001 From: Yannick Brosseau Date: Mon, 16 May 2022 16:39:39 -0400 Subject: iio: adc: stm32: Fix IRQs on STM32F4 by removing custom spurious IRQs message The check for spurious IRQs introduced in 695e2f5c289bb assumed that the bits in the control and status registers are aligned. This is true for the H7 and MP1 version, but not the F4. The interrupt was then never handled on the F4. Instead of increasing the complexity of the comparison and check each bit specifically, we remove this check completely and rely on the generic handler for spurious IRQs. Fixes: 695e2f5c289b ("iio: adc: stm32-adc: fix a regression when using dma and irq") Signed-off-by: Yannick Brosseau Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220516203939.3498673-3-yannick.brosseau@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index a68ecbda6480..8c5f05f593ab 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1407,7 +1407,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); /* Check ovr status right now, as ovr mask should be already disabled */ if (status & regs->isr_ovr.mask) { @@ -1422,11 +1421,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) return IRQ_HANDLED; } - if (!(status & mask)) - dev_err_ratelimited(&indio_dev->dev, - "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n", - mask, status); - return IRQ_NONE; } @@ -1436,10 +1430,6 @@ static irqreturn_t stm32_adc_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); - - if (!(status & mask)) - return IRQ_WAKE_THREAD; if (status & regs->isr_ovr.mask) { /* -- cgit v1.2.3 From 8a2b6b5687984a010ed094b4f436a2f091987758 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 16 May 2022 11:52:02 +0400 Subject: iio: adc: aspeed: Fix refcount leak in aspeed_adc_set_trim_data of_find_node_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: d0a4c17b4073 ("iio: adc: aspeed: Get and set trimming data.") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220516075206.34580-1-linmq006@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/aspeed_adc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 0793d2474cdc..9341e0e0eb55 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -186,6 +186,7 @@ static int aspeed_adc_set_trim_data(struct iio_dev *indio_dev) return -EOPNOTSUPP; } scu = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(scu)) { dev_warn(data->dev, "Failed to get syscon regmap\n"); return -EOPNOTSUPP; -- cgit v1.2.3 From 9decacd8b3a432316d61c4366f302e63384cb08d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 24 May 2022 09:54:48 +0200 Subject: iio: afe: rescale: Fix boolean logic bug When introducing support for processed channels I needed to invert the expression: if (!iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || !iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) dev_err(dev, "source channel does not support raw/scale\n"); To the inverse, meaning detect when we can usse raw+scale rather than when we can not. This was the result: if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) dev_info(dev, "using raw+scale source channel\n"); Ooops. Spot the error. Yep old George Boole came up and bit me. That should be an &&. The current code "mostly works" because we have not run into systems supporting only raw but not scale or only scale but not raw, and I doubt there are few using the rescaler on anything such, but let's fix the logic. Cc: Liam Beguin Cc: stable@vger.kernel.org Fixes: 53ebee949980 ("iio: afe: iio-rescale: Support processed channels") Signed-off-by: Linus Walleij Reviewed-by: Liam Beguin Acked-by: Peter Rosin Link: https://lore.kernel.org/r/20220524075448.140238-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/afe/iio-rescale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index 7e511293d6d1..dc426e1484f0 100644 --- a/drivers/iio/afe/iio-rescale.c +++ b/drivers/iio/afe/iio-rescale.c @@ -278,7 +278,7 @@ static int rescale_configure_channel(struct device *dev, chan->ext_info = rescale->ext_info; chan->type = rescale->cfg->type; - if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || + if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) && iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) { dev_info(dev, "using raw+scale source channel\n"); } else if (iio_channel_has_info(schan, IIO_CHAN_INFO_PROCESSED)) { -- cgit v1.2.3 From e5f3205b04d7f95a2ef43bce4b454a7f264d6923 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:39 +0000 Subject: iio:accel:bma180: rearrange iio trigger get and register IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: 0668a4e4d297 ("iio: accel: bma180: Fix indio_dev->trig assignment") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-2-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma180.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 4f73bc827eec..9c9e98578667 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -1006,11 +1006,12 @@ static int bma180_probe(struct i2c_client *client, data->trig->ops = &bma180_trigger_ops; iio_trigger_set_drvdata(data->trig, indio_dev); - indio_dev->trig = iio_trigger_get(data->trig); ret = iio_trigger_register(data->trig); if (ret) goto err_trigger_free; + + indio_dev->trig = iio_trigger_get(data->trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, -- cgit v1.2.3 From ed302925d708f2f97ae5e9fd6c56c16bb34f6629 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:42 +0000 Subject: iio:accel:kxcjk-1013: rearrange iio trigger get and register IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: c1288b833881 ("iio: accel: kxcjk-1013: Increment ref counter for indio_dev->trig") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-3-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index ac74cdcd2bc8..748b35c2f0c3 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1554,12 +1554,12 @@ static int kxcjk1013_probe(struct i2c_client *client, data->dready_trig->ops = &kxcjk1013_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->dready_trig); if (ret) goto err_poweroff; + indio_dev->trig = iio_trigger_get(data->dready_trig); + data->motion_trig->ops = &kxcjk1013_trigger_ops; iio_trigger_set_drvdata(data->motion_trig, indio_dev); ret = iio_trigger_register(data->motion_trig); -- cgit v1.2.3 From 9354c224c9b4f55847a0de3e968cba2ebf15af3b Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:43 +0000 Subject: iio:accel:mxc4005: rearrange iio trigger get and register IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: 47196620c82f ("iio: mxc4005: add data ready trigger for mxc4005") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-4-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mxc4005.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index b3afbf064915..df600d2917c0 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -456,8 +456,6 @@ static int mxc4005_probe(struct i2c_client *client, data->dready_trig->ops = &mxc4005_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = devm_iio_trigger_register(&client->dev, data->dready_trig); if (ret) { @@ -465,6 +463,8 @@ static int mxc4005_probe(struct i2c_client *client, "failed to register trigger\n"); return ret; } + + indio_dev->trig = iio_trigger_get(data->dready_trig); } return devm_iio_device_register(&client->dev, indio_dev); -- cgit v1.2.3 From d710359c0b445e8c03e24f19ae2fb79ce7282260 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:45 +0000 Subject: iio:chemical:ccs811: rearrange iio trigger get and register IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: f1f065d7ac30 ("iio: chemical: ccs811: Add support for data ready trigger") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-5-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/ccs811.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 847194fa1e46..80ef1aa9aae3 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -499,11 +499,11 @@ static int ccs811_probe(struct i2c_client *client, data->drdy_trig->ops = &ccs811_trigger_ops; iio_trigger_set_drvdata(data->drdy_trig, indio_dev); - indio_dev->trig = data->drdy_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->drdy_trig); if (ret) goto err_poweroff; + + indio_dev->trig = iio_trigger_get(data->drdy_trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, -- cgit v1.2.3 From 10b9c2c33ac706face458feab8965f11743c98c0 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 24 May 2022 18:14:46 +0000 Subject: iio:humidity:hts221: rearrange iio trigger get and register IIO trigger interface function iio_trigger_get() should be called after iio_trigger_register() (or its devm analogue) strictly, because of iio_trigger_get() acquires module refcnt based on the trigger->owner pointer, which is initialized inside iio_trigger_register() to THIS_MODULE. If this call order is wrong, the next iio_trigger_put() (from sysfs callback or "delete module" path) will dereference "default" module refcnt, which is incorrect behaviour. Fixes: e4a70e3e7d84 ("iio: humidity: add support to hts221 rh/temp combo device") Signed-off-by: Dmitry Rokosov Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220524181150.9240-6-ddrokosov@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hts221_buffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hts221_buffer.c b/drivers/iio/humidity/hts221_buffer.c index f29692b9d2db..66b32413cf5e 100644 --- a/drivers/iio/humidity/hts221_buffer.c +++ b/drivers/iio/humidity/hts221_buffer.c @@ -135,9 +135,12 @@ int hts221_allocate_trigger(struct iio_dev *iio_dev) iio_trigger_set_drvdata(hw->trig, iio_dev); hw->trig->ops = &hts221_trigger_ops; + + err = devm_iio_trigger_register(hw->dev, hw->trig); + iio_dev->trig = iio_trigger_get(hw->trig); - return devm_iio_trigger_register(hw->dev, hw->trig); + return err; } static int hts221_buffer_preenable(struct iio_dev *iio_dev) -- cgit v1.2.3 From 7a2f6f61e8ee016b75e1b1dd62fbd03e6d6db37d Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Wed, 1 Jun 2022 10:21:38 -0400 Subject: iio: test: fix missing MODULE_LICENSE for IIO_RESCALE=m When IIO_RESCALE_KUNIT_TEST=y and IIO_RESCALE=m, drivers/iio/afe/iio-rescale.o is built twice causing the MODULE_LICENSE() to be lost, as shown by: ERROR: modpost: missing MODULE_LICENSE() in drivers/iio/afe/iio-rescale.o Rework the build configuration to have the dependency specified in the Kconfig. Reported-by: Randy Dunlap Fixes: 8e74a48d17d5 ("iio: test: add basic tests for the iio-rescale driver") Signed-off-by: Liam Beguin Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Masahiro Yamada Link: https://lore.kernel.org/r/20220601142138.3331278-1-liambeguin@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/test/Kconfig | 2 +- drivers/iio/test/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/test/Kconfig b/drivers/iio/test/Kconfig index 56ca0ad7e77a..4c66c3f18c34 100644 --- a/drivers/iio/test/Kconfig +++ b/drivers/iio/test/Kconfig @@ -6,7 +6,7 @@ # Keep in alphabetical order config IIO_RESCALE_KUNIT_TEST bool "Test IIO rescale conversion functions" - depends on KUNIT=y && !IIO_RESCALE + depends on KUNIT=y && IIO_RESCALE=y default KUNIT_ALL_TESTS help If you want to run tests on the iio-rescale code say Y here. diff --git a/drivers/iio/test/Makefile b/drivers/iio/test/Makefile index f15ae0a6394f..880360f8d02c 100644 --- a/drivers/iio/test/Makefile +++ b/drivers/iio/test/Makefile @@ -4,6 +4,6 @@ # # Keep in alphabetical order -obj-$(CONFIG_IIO_RESCALE_KUNIT_TEST) += iio-test-rescale.o ../afe/iio-rescale.o +obj-$(CONFIG_IIO_RESCALE_KUNIT_TEST) += iio-test-rescale.o obj-$(CONFIG_IIO_TEST_FORMAT) += iio-test-format.o CFLAGS_iio-test-format.o += $(DISABLE_STRUCTLEAK_PLUGIN) -- cgit v1.2.3 From ada7b0c0dedafd7d059115adf49e48acba3153a8 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 24 May 2022 11:45:17 +0400 Subject: iio: adc: adi-axi-adc: Fix refcount leak in adi_axi_adc_attach_client of_parse_phandle() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. Add missing of_node_put() to avoid refcount leak. Fixes: ef04070692a2 ("iio: adc: adi-axi-adc: add support for AXI ADC IP core") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220524074517.45268-1-linmq006@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a73e3c2d212f..a9e655e69eaa 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -322,16 +322,19 @@ static struct adi_axi_adc_client *adi_axi_adc_attach_client(struct device *dev) if (!try_module_get(cl->dev->driver->owner)) { mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-ENODEV); } get_device(cl->dev); cl->info = info; mutex_unlock(®istered_clients_lock); + of_node_put(cln); return cl; } mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-EPROBE_DEFER); } -- cgit v1.2.3 From f1a633b15cd5371a2a83f02c513984e51132dd68 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 30 May 2022 11:50:26 +0300 Subject: iio: adc: vf610: fix conversion mode sysfs node name The documentation missed the "in_" prefix for this IIO_SHARED_BY_DIR entry. Fixes: bf04c1a367e3 ("iio: adc: vf610: implement configurable conversion modes") Signed-off-by: Baruch Siach Acked-by: Haibo Chen Link: https://lore.kernel.org/r/560dc93fafe5ef7e9a409885fd20b6beac3973d8.1653900626.git.baruch@tkos.co.il Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio-vf610 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf..491ead804488 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: -- cgit v1.2.3 From 106b391e1b859100a3f38f0ad874236e9be06bde Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Thu, 9 Jun 2022 12:23:01 +0200 Subject: iio: imu: inv_icm42600: Fix broken icm42600 (chip id 0 value) The 0 value used for INV_CHIP_ICM42600 was not working since the match in i2c/spi was checking against NULL value. To keep this check, add a first INV_CHIP_INVALID 0 value as safe guard. Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20220609102301.4794-1-jmaneyrol@invensense.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600.h | 1 + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index c0f5059b13b3..995a9dc06521 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -17,6 +17,7 @@ #include "inv_icm42600_buffer.h" enum inv_icm42600_chip { + INV_CHIP_INVALID, INV_CHIP_ICM42600, INV_CHIP_ICM42602, INV_CHIP_ICM42605, diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index 86858da9cc38..ca85fccc9839 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -565,7 +565,7 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, bool open_drain; int ret; - if (chip < 0 || chip >= INV_CHIP_NB) { + if (chip <= INV_CHIP_INVALID || chip >= INV_CHIP_NB) { dev_err(dev, "invalid chip = %d\n", chip); return -ENODEV; } -- cgit v1.2.3 From bc05f30fc24705cd023f38659303376eaa5767df Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 9 Jun 2022 11:58:56 +0200 Subject: iio: adc: stm32: fix vrefint wrong calibration value handling If the vrefint calibration is zero, the vrefint channel output value cannot be computed. Currently, in such case, the raw conversion value is returned, which is not relevant. Do not expose the vrefint channel when the output value cannot be computed, instead. Fixes: 0e346b2cfa85 ("iio: adc: stm32-adc: add vrefint calibration support") Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220609095856.376961-1-olivier.moysan@foss.st.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 8c5f05f593ab..11ef873d6453 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1365,7 +1365,7 @@ static int stm32_adc_read_raw(struct iio_dev *indio_dev, else ret = -EINVAL; - if (mask == IIO_CHAN_INFO_PROCESSED && adc->vrefint.vrefint_cal) + if (mask == IIO_CHAN_INFO_PROCESSED) *val = STM32_ADC_VREFINT_VOLTAGE * adc->vrefint.vrefint_cal / *val; iio_device_release_direct_mode(indio_dev); @@ -1969,10 +1969,10 @@ static int stm32_adc_populate_int_ch(struct iio_dev *indio_dev, const char *ch_n for (i = 0; i < STM32_ADC_INT_CH_NB; i++) { if (!strncmp(stm32_adc_ic[i].name, ch_name, STM32_ADC_CH_SZ)) { - adc->int_ch[i] = chan; - - if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT) - continue; + if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT) { + adc->int_ch[i] = chan; + break; + } /* Get calibration data for vrefint channel */ ret = nvmem_cell_read_u16(&indio_dev->dev, "vrefint", &vrefint); @@ -1980,10 +1980,15 @@ static int stm32_adc_populate_int_ch(struct iio_dev *indio_dev, const char *ch_n return dev_err_probe(indio_dev->dev.parent, ret, "nvmem access error\n"); } - if (ret == -ENOENT) - dev_dbg(&indio_dev->dev, "vrefint calibration not found\n"); - else - adc->vrefint.vrefint_cal = vrefint; + if (ret == -ENOENT) { + dev_dbg(&indio_dev->dev, "vrefint calibration not found. Skip vrefint channel\n"); + return ret; + } else if (!vrefint) { + dev_dbg(&indio_dev->dev, "Null vrefint calibration value. Skip vrefint channel\n"); + return -ENOENT; + } + adc->int_ch[i] = chan; + adc->vrefint.vrefint_cal = vrefint; } } @@ -2020,7 +2025,9 @@ static int stm32_adc_generic_chan_init(struct iio_dev *indio_dev, } strncpy(adc->chan_name[val], name, STM32_ADC_CH_SZ); ret = stm32_adc_populate_int_ch(indio_dev, name, val); - if (ret) + if (ret == -ENOENT) + continue; + else if (ret) goto err; } else if (ret != -EINVAL) { dev_err(&indio_dev->dev, "Invalid label %d\n", ret); -- cgit v1.2.3 From 990539486e7e311fb5dab1bf4d85d1a8973ae644 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 9 Jun 2022 11:52:34 +0200 Subject: iio: adc: stm32: fix maximum clock rate for stm32mp15x Change maximum STM32 ADC input clock rate to 36MHz, as specified in STM32MP15x datasheets. Fixes: d58c67d1d851 ("iio: adc: stm32-adc: add support for STM32MP1") Signed-off-by: Olivier Moysan Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20220609095234.375925-1-olivier.moysan@foss.st.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index bb04deeb7992..3efb8c404ccc 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -809,7 +809,7 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .regs = &stm32h7_adc_common_regs, .clk_sel = stm32h7_adc_clk_sel, - .max_clk_rate_hz = 40000000, + .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, .num_adcs = 2, -- cgit v1.2.3 From bf745142cc0a3e1723f9207fb0c073c88464b7b4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Jun 2022 19:31:58 +0800 Subject: iio: accel: mma8452: ignore the return value of reset operation On fxls8471, after set the reset bit, the device will reset immediately, will not give ACK. So ignore the return value of this reset operation, let the following code logic to check whether the reset operation works. Signed-off-by: Haibo Chen Fixes: ecabae713196 ("iio: mma8452: Initialise before activating") Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/1655292718-14287-1-git-send-email-haibo.chen@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 4156d216c640..f4f835274d75 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1510,10 +1510,14 @@ static int mma8452_reset(struct i2c_client *client) int i; int ret; - ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, + /* + * Find on fxls8471, after config reset bit, it reset immediately, + * and will not give ACK, so here do not check the return value. + * The following code will read the reset register, and check whether + * this reset works. + */ + i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, MMA8452_CTRL_REG2_RST); - if (ret < 0) - return ret; for (i = 0; i < 10; i++) { usleep_range(100, 200); -- cgit v1.2.3 From 70171ed6dc53d2f580166d47f5b66cf51a6d0092 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Mon, 13 Jun 2022 16:22:24 -0700 Subject: iio:proximity:sx9324: Check ret value of device_property_read_u32_array() 0-day reports: drivers/iio/proximity/sx9324.c:868:3: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores] Put an if condition to break out of switch if ret is non-zero. Signed-off-by: Aashish Sharma Fixes: a8ee3b32f5da ("iio:proximity:sx9324: Add dt_binding support") Reported-by: kernel test robot [swboyd@chromium.org: Reword commit subject, add fixes tag] Signed-off-by: Stephen Boyd Reviewed-by: Gwendal Grignou Link: https://lore.kernel.org/r/20220613232224.2466278-1-swboyd@chromium.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9324.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index 70c37f664f6d..63fbcaa4cac8 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -885,6 +885,9 @@ sx9324_get_default_reg(struct device *dev, int idx, break; ret = device_property_read_u32_array(dev, prop, pin_defs, ARRAY_SIZE(pin_defs)); + if (ret) + break; + for (pin = 0; pin < SX9324_NUM_PINS; pin++) raw |= (pin_defs[pin] << (2 * pin)) & SX9324_REG_AFE_PH0_PIN_MASK(pin); -- cgit v1.2.3 From 534d2eaf1970274150596fdd2bf552721e65d6b2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Jun 2022 02:03:12 +0200 Subject: random: schedule mix_interrupt_randomness() less often It used to be that mix_interrupt_randomness() would credit 1 bit each time it ran, and so add_interrupt_randomness() would schedule mix() to run every 64 interrupts, a fairly arbitrary number, but nonetheless considered to be a decent enough conservative estimate. Since e3e33fc2ea7f ("random: do not use input pool from hard IRQs"), mix() is now able to credit multiple bits, depending on the number of calls to add(). This was done for reasons separate from this commit, but it has the nice side effect of enabling this patch to schedule mix() less often. Currently the rules are: a) Credit 1 bit for every 64 calls to add(). b) Schedule mix() once a second that add() is called. c) Schedule mix() once every 64 calls to add(). Rules (a) and (c) no longer need to be coupled. It's still important to have _some_ value in (c), so that we don't "over-saturate" the fast pool, but the once per second we get from rule (b) is a plenty enough baseline. So, by increasing the 64 in rule (c) to something larger, we avoid calling queue_work_on() as frequently during irq storms. This commit changes that 64 in rule (c) to be 1024, which means we schedule mix() 16 times less often. And it does *not* need to change the 64 in rule (a). Fixes: 58340f8e952b ("random: defer fast pool mixing to worker") Cc: stable@vger.kernel.org Cc: Dominik Brodowski Acked-by: Sebastian Andrzej Siewior Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 655e327d425e..d0e4c89c4fcb 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1009,7 +1009,7 @@ void add_interrupt_randomness(int irq) if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ)) + if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; if (unlikely(!fast_pool->mix.func)) -- cgit v1.2.3 From c01d4d0a82b71857be7449380338bc53dde2da92 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Jun 2022 15:00:51 +0200 Subject: random: quiet urandom warning ratelimit suppression message random.c ratelimits how much it warns about uninitialized urandom reads using __ratelimit(). When the RNG is finally initialized, it prints the number of missed messages due to ratelimiting. It has been this way since that functionality was introduced back in 2018. Recently, cc1e127bfa95 ("random: remove ratelimiting for in-kernel unseeded randomness") put a bit more stress on the urandom ratelimiting, which teased out a bug in the implementation. Specifically, when under pressure, __ratelimit() will print its own message and reset the count back to 0, making the final message at the end less useful. Secondly, it does so as a pr_warn(), which apparently is undesirable for people's CI. Fortunately, __ratelimit() has the RATELIMIT_MSG_ON_RELEASE flag exactly for this purpose, so we set the flag. Fixes: 4e00b339e264 ("random: rate limit unseeded randomness warnings") Cc: stable@vger.kernel.org Reported-by: Jon Hunter Reported-by: Ron Economos Tested-by: Ron Economos Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- include/linux/ratelimit_types.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index d0e4c89c4fcb..07a022e24057 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -87,7 +87,7 @@ static struct fasync_struct *fasync; /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); + RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); static int ratelimit_disable __read_mostly = IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index c21c7f8103e2..002266693e50 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -23,12 +23,16 @@ struct ratelimit_state { unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) -- cgit v1.2.3 From 4cde00d50707c2ef6647b9b96b2cb40b6eb24397 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 May 2022 18:27:09 -0700 Subject: f2fs: attach inline_data after setting compression This fixes the below corruption. [345393.335389] F2FS-fs (vdb): sanity_check_inode: inode (ino=6d0, mode=33206) should not have inline_data, run fsck to fix Cc: Fixes: 677a82b44ebf ("f2fs: fix to do sanity check for inline inode") Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c549acb52ac4..bf00d5057abb 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -89,8 +89,6 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); - if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); @@ -107,10 +105,6 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, f2fs_init_extent_tree(inode, NULL); - stat_inc_inline_xattr(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); - F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); @@ -127,6 +121,14 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, set_compress_context(inode); } + /* Should enable inline_data after compression set */ + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) + set_inode_flag(inode, FI_INLINE_DATA); + + stat_inc_inline_xattr(inode); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -325,6 +327,9 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, if (!is_extension_exist(name, ext[i], false)) continue; + /* Do not use inline_data with compression */ + stat_dec_inline_inode(inode); + clear_inode_flag(inode, FI_INLINE_DATA); set_compress_context(inode); return; } -- cgit v1.2.3 From 61803e984307c767a96d85f3b61ca50e1705fc67 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 10 Jun 2022 11:32:40 -0700 Subject: f2fs: fix iostat related lock protection Made iostat related locks safe to be called from irq context again. Cc: Fixes: a1e09b03e6f5 ("f2fs: use iomap for direct I/O") Signed-off-by: Daeho Jeong Reviewed-by: Stanley Chu Tested-by: Eddie Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/iostat.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index be599f31d3c4..d84c5f6cc09d 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -91,8 +91,9 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) unsigned int cnt; struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; struct iostat_lat_info *io_lat = sbi->iostat_io_lat; + unsigned long flags; - spin_lock_bh(&sbi->iostat_lat_lock); + spin_lock_irqsave(&sbi->iostat_lat_lock, flags); for (idx = 0; idx < MAX_IO_TYPE; idx++) { for (io = 0; io < NR_PAGE_TYPE; io++) { cnt = io_lat->bio_cnt[idx][io]; @@ -106,7 +107,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) io_lat->bio_cnt[idx][io] = 0; } } - spin_unlock_bh(&sbi->iostat_lat_lock); + spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags); trace_f2fs_iostat_latency(sbi, iostat_lat); } @@ -115,14 +116,15 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) { unsigned long long iostat_diff[NR_IO_TYPE]; int i; + unsigned long flags; if (time_is_after_jiffies(sbi->iostat_next_period)) return; /* Need double check under the lock */ - spin_lock_bh(&sbi->iostat_lock); + spin_lock_irqsave(&sbi->iostat_lock, flags); if (time_is_after_jiffies(sbi->iostat_next_period)) { - spin_unlock_bh(&sbi->iostat_lock); + spin_unlock_irqrestore(&sbi->iostat_lock, flags); return; } sbi->iostat_next_period = jiffies + @@ -133,7 +135,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) sbi->prev_rw_iostat[i]; sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; } - spin_unlock_bh(&sbi->iostat_lock); + spin_unlock_irqrestore(&sbi->iostat_lock, flags); trace_f2fs_iostat(sbi, iostat_diff); @@ -145,25 +147,27 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) struct iostat_lat_info *io_lat = sbi->iostat_io_lat; int i; - spin_lock_bh(&sbi->iostat_lock); + spin_lock_irq(&sbi->iostat_lock); for (i = 0; i < NR_IO_TYPE; i++) { sbi->rw_iostat[i] = 0; sbi->prev_rw_iostat[i] = 0; } - spin_unlock_bh(&sbi->iostat_lock); + spin_unlock_irq(&sbi->iostat_lock); - spin_lock_bh(&sbi->iostat_lat_lock); + spin_lock_irq(&sbi->iostat_lat_lock); memset(io_lat, 0, sizeof(struct iostat_lat_info)); - spin_unlock_bh(&sbi->iostat_lat_lock); + spin_unlock_irq(&sbi->iostat_lat_lock); } void f2fs_update_iostat(struct f2fs_sb_info *sbi, enum iostat_type type, unsigned long long io_bytes) { + unsigned long flags; + if (!sbi->iostat_enable) return; - spin_lock_bh(&sbi->iostat_lock); + spin_lock_irqsave(&sbi->iostat_lock, flags); sbi->rw_iostat[type] += io_bytes; if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO) @@ -172,7 +176,7 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO) sbi->rw_iostat[APP_READ_IO] += io_bytes; - spin_unlock_bh(&sbi->iostat_lock); + spin_unlock_irqrestore(&sbi->iostat_lock, flags); f2fs_record_iostat(sbi); } @@ -185,6 +189,7 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, struct f2fs_sb_info *sbi = iostat_ctx->sbi; struct iostat_lat_info *io_lat = sbi->iostat_io_lat; int idx; + unsigned long flags; if (!sbi->iostat_enable) return; @@ -202,12 +207,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, idx = WRITE_ASYNC_IO; } - spin_lock_bh(&sbi->iostat_lat_lock); + spin_lock_irqsave(&sbi->iostat_lat_lock, flags); io_lat->sum_lat[idx][iotype] += ts_diff; io_lat->bio_cnt[idx][iotype]++; if (ts_diff > io_lat->peak_lat[idx][iotype]) io_lat->peak_lat[idx][iotype] = ts_diff; - spin_unlock_bh(&sbi->iostat_lat_lock); + spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags); } void iostat_update_and_unbind_ctx(struct bio *bio, int rw) -- cgit v1.2.3 From 28438794aba47a27e922857d27b31b74e8559143 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 11 Jun 2022 03:32:30 +0900 Subject: modpost: fix section mismatch check for exported init/exit sections Since commit f02e8a6596b7 ("module: Sort exported symbols"), EXPORT_SYMBOL* is placed in the individual section ___ksymtab(_gpl)+ (3 leading underscores instead of 2). Since then, modpost cannot detect the bad combination of EXPORT_SYMBOL and __init/__exit. Fix the .fromsec field. Fixes: f02e8a6596b7 ("module: Sort exported symbols") Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 29d5a841e215..620dc8c4c814 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -980,7 +980,7 @@ static const struct sectioncheck sectioncheck[] = { }, /* Do not export init/exit functions or data */ { - .fromsec = { "__ksymtab*", NULL }, + .fromsec = { "___ksymtab*", NULL }, .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL }, .mismatch = EXPORT_TO_INIT_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, -- cgit v1.2.3 From 291810be4227564403807e663f3ec8d3b3d6ba34 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 17 Jun 2022 09:58:17 -0700 Subject: Documentation/llvm: Update Supported Arch table While watching Michael's new talk on Clang-built-Linux, I noticed the arch table in our docs that he refers to is outdated. Add hexagon and User Mode. Bump MIPS and RISCV to LLVM=1. PowerPC is almost LLVM=1 capable; ppc64le works, but ppc64 (big endian) and ppc32 still need more work. Link: https://youtu.be/W4zdEDpvR5c?t=399 Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index b854bb413164..6b2bac8e9ce0 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -129,18 +129,24 @@ yet. Bug reports are always welcome at the issue tracker below! * - arm64 - Supported - ``LLVM=1`` + * - hexagon + - Maintained + - ``LLVM=1`` * - mips - Maintained - - ``CC=clang`` + - ``LLVM=1`` * - powerpc - Maintained - ``CC=clang`` * - riscv - Maintained - - ``CC=clang`` + - ``LLVM=1`` * - s390 - Maintained - ``CC=clang`` + * - um (User Mode) + - Maintained + - ``LLVM=1`` * - x86 - Supported - ``LLVM=1`` -- cgit v1.2.3 From 9243fc4cd28c8bdddd7fe0abd5bbec3c4fdf5052 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 3 Jun 2022 14:35:29 +0900 Subject: block: remove queue from struct blk_independent_access_range The request queue pointer in struct blk_independent_access_range is unused. Remove it. Signed-off-by: Damien Le Moal Fixes: 41e46b3c2aa2 ("block: Fix potential deadlock in blk_ia_range_sysfs_show()") Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220603053529.76405-1-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe --- block/blk-ia-ranges.c | 1 - include/linux/blkdev.h | 1 - 2 files changed, 2 deletions(-) diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index 56ed48d2954e..47c89e65b57f 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk, } for (i = 0; i < iars->nr_ia_ranges; i++) { - iars->ia_range[i].queue = q; ret = kobject_init_and_add(&iars->ia_range[i].kobj, &blk_ia_range_ktype, &iars->kobj, "%d", i); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 73c886eba8e1..2f7b43444c5f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -342,7 +342,6 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, */ struct blk_independent_access_range { struct kobject kobj; - struct request_queue *queue; sector_t sector; sector_t nr_sectors; }; -- cgit v1.2.3 From 63b8ea5e4f1a87dea4d3114293fc8e96a8f193d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 20 Jun 2022 11:03:48 +0200 Subject: random: update comment from copy_to_user() -> copy_to_iter() This comment wasn't updated when we moved from read() to read_iter(), so this patch makes the trivial fix. Fixes: 1b388e7765f2 ("random: convert to using fops->read_iter()") Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 07a022e24057..e3dd1dd3dd22 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -408,7 +408,7 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter) /* * Immediately overwrite the ChaCha key at index 4 with random - * bytes, in case userspace causes copy_to_user() below to sleep + * bytes, in case userspace causes copy_to_iter() below to sleep * forever, so that we still retain forward secrecy in that case. */ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); -- cgit v1.2.3 From a2d9b75b19dc8863f0845ffb401d33b2286d0aa1 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 20 Jun 2022 02:45:10 -0700 Subject: xtensa: change '.bss' to '.section .bss' For some reason (ancient assembler?) the following build error is reported by the kisskb: kisskb/src/arch/xtensa/kernel/entry.S: Error: unknown pseudo-op: `.bss': => 2176 Change abbreviated '.bss' to the full '.section .bss, "aw"' to fix this error. Reported-by: Geert Uytterhoeven Signed-off-by: Max Filippov --- arch/xtensa/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e3eae648ba2e..ab30bcb46290 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -2173,7 +2173,7 @@ ENDPROC(ret_from_kernel_thread) #ifdef CONFIG_HIBERNATION - .bss + .section .bss, "aw" .align 4 .Lsaved_regs: #if defined(__XTENSA_WINDOWED_ABI__) -- cgit v1.2.3 From 13bd259b64bb58ae130923ada42ebc19bf3f2fa2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Jun 2022 23:14:39 +0300 Subject: drm/i915: Implement w/a 22010492432 for adl-s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adl-s needs the combo PLL DCO fraction w/a as well. Gets us slightly more accurate clock out of the PLL. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 22f55574a35c..88c2f38aa870 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2396,7 +2396,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, } /* - * Display WA #22010492432: ehl, tgl, adl-p + * Display WA #22010492432: ehl, tgl, adl-s, adl-p * Program half of the nominal DCO divider fraction value. */ static bool @@ -2404,7 +2404,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || - IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && + IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } -- cgit v1.2.3 From 3828296ad6242c25d2679d32a377b8e07c6b08c0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Jun 2022 15:00:56 +0100 Subject: drm/i915/fdinfo: Don't show engine classes not present Stop displaying engine classes with no engines - it is not a huge problem if they are shown, since the values will correctly be all zeroes, but it does count as misleading. Signed-off-by: Tvrtko Ursulin Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via fdinfo") Cc: Umesh Nerlige Ramappa Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20220616140056.559074-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 9f1b1d0b2242171b2891a0398def233801601c14) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drm_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 18d38cb59923..b09d1d386574 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -116,8 +116,9 @@ show_client_class(struct seq_file *m, total += busy_add(ctx, class); rcu_read_unlock(); - seq_printf(m, "drm-engine-%s:\t%llu ns\n", - uabi_class_names[class], total); + if (capacity) + seq_printf(m, "drm-engine-%s:\t%llu ns\n", + uabi_class_names[class], total); if (capacity > 1) seq_printf(m, "drm-engine-capacity-%s:\t%u\n", -- cgit v1.2.3 From 342fc0c3b345525da21112bd0478a0dc741598ea Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 14 Jun 2022 09:56:23 +0200 Subject: USB: serial: option: add Telit LE910Cx 0x1250 composition Add support for the following Telit LE910Cx composition: 0x1250: rmnet, tty, tty, tty, tty Reviewed-by: Daniele Palmas Signed-off-by: Carlo Lobrano Link: https://lore.kernel.org/r/20220614075623.2392607-1-c.lobrano@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ed1e50d83cca..222b1e3d45a6 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1279,6 +1279,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), -- cgit v1.2.3 From 419bc8f681a0dc63588cee693b6d45e7caa6006c Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Fri, 17 Jun 2022 20:42:51 +0800 Subject: spi: rockchip: Unmask IRQ at the final to avoid preemption Avoid pio_write process is preempted, resulting in abnormal state. Signed-off-by: Jon Lin Signed-off-by: Jon Link: https://lore.kernel.org/r/20220617124251.5051-1-jon.lin@rock-chips.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index a08215eb9e14..79242dc5272d 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -381,15 +381,18 @@ static int rockchip_spi_prepare_irq(struct rockchip_spi *rs, rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0; rs->rx_left = xfer->len / rs->n_bytes; - if (rs->cs_inactive) - writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR); - else - writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR); + writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR); + spi_enable_chip(rs, true); if (rs->tx_left) rockchip_spi_pio_writer(rs); + if (rs->cs_inactive) + writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR); + else + writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR); + /* 1 means the transfer is in progress */ return 1; } -- cgit v1.2.3 From fc378794a2f7a19cf26010dc33b89ba608d4c70f Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Sun, 5 Jun 2022 16:59:13 +0800 Subject: video: fbdev: skeletonfb: Fix syntax errors in comments Delete the redundant word 'its'. Signed-off-by: Xiang wangx Signed-off-by: Helge Deller --- drivers/video/fbdev/skeletonfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c index bcacfb6934fa..3d4d78362ede 100644 --- a/drivers/video/fbdev/skeletonfb.c +++ b/drivers/video/fbdev/skeletonfb.c @@ -96,7 +96,7 @@ static const struct fb_fix_screeninfo xxxfb_fix = { /* * Modern graphical hardware not only supports pipelines but some - * also support multiple monitors where each display can have its + * also support multiple monitors where each display can have * its own unique data. In this case each display could be * represented by a separate framebuffer device thus a separate * struct fb_info. Now the struct xxx_par represents the graphics -- cgit v1.2.3 From 25c9a15fb7bbfafb94dd3b4e3165c18b8e1bd039 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 17 Jun 2022 15:38:04 +0200 Subject: video: fbdev: intelfb: Use aperture size from pci_resource_len Aperture size for i9x5 variants is determined from PCI base address. if (pci_resource_start(pdev, 2) & 0x08000000) *aperture_size = MB(128); ... This condition is incorrect as 128 MiB address can have the address set as 0x?8000000 or 0x?0000000. Also the code can be simplified to just use pci_resource_len(). The true settings of the aperture size is in the MSAC register, which could be used instead. However the value is used only as an info message, so it doesn't matter. Signed-off-by: Petr Cvek Signed-off-by: Helge Deller --- drivers/video/fbdev/intelfb/intelfbhw.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/intelfb/intelfbhw.c b/drivers/video/fbdev/intelfb/intelfbhw.c index 57aff7450bce..2086e06532ee 100644 --- a/drivers/video/fbdev/intelfb/intelfbhw.c +++ b/drivers/video/fbdev/intelfb/intelfbhw.c @@ -201,13 +201,11 @@ int intelfbhw_get_memory(struct pci_dev *pdev, int *aperture_size, case PCI_DEVICE_ID_INTEL_945GME: case PCI_DEVICE_ID_INTEL_965G: case PCI_DEVICE_ID_INTEL_965GM: - /* 915, 945 and 965 chipsets support a 256MB aperture. - Aperture size is determined by inspected the - base address of the aperture. */ - if (pci_resource_start(pdev, 2) & 0x08000000) - *aperture_size = MB(128); - else - *aperture_size = MB(256); + /* + * 915, 945 and 965 chipsets support 64MB, 128MB or 256MB + * aperture. Determine size from PCI resource length. + */ + *aperture_size = pci_resource_len(pdev, 2); break; default: if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M) -- cgit v1.2.3 From d36a869e0d0e56439365ed6d836480c480470e88 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 17 Jun 2022 15:38:13 +0200 Subject: video: fbdev: intelfb: Initialize value of stolen size Variable stolen_size can be left uninitialized in a code path with INTEL_855_GMCH_GMS_DISABLED. Fix this by initializing the variable to 0. Also fix indentation of function arguments. Signed-off-by: Petr Cvek Signed-off-by: Helge Deller --- drivers/video/fbdev/intelfb/intelfbdrv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index a9579964eaba..5647fca8c49a 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -472,7 +472,7 @@ static int intelfb_pci_register(struct pci_dev *pdev, struct fb_info *info; struct intelfb_info *dinfo; int i, err, dvo; - int aperture_size, stolen_size; + int aperture_size, stolen_size = 0; struct agp_kern_info gtt_info; int agp_memtype; const char *s; @@ -571,7 +571,7 @@ static int intelfb_pci_register(struct pci_dev *pdev, return -ENODEV; } - if (intelfbhw_get_memory(pdev, &aperture_size,&stolen_size)) { + if (intelfbhw_get_memory(pdev, &aperture_size, &stolen_size)) { cleanup(dinfo); return -ENODEV; } -- cgit v1.2.3 From e146a096217e335f4e297a4fbba7ce6c722a1115 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Jun 2022 18:11:11 -0500 Subject: video: fbdev: cirrusfb: Remove useless reference to PCI power management PCI-specific power management (pci_driver.suspend and pci_driver.resume) is deprecated. The cirrusfb driver has never implemented power management at all, but if it ever does, it should use the generic power management framework, not the PCI-specific hooks. Remove the commented-out references to the PCI-specific power management hooks. Signed-off-by: Bjorn Helgaas Acked-by: Daniel Vetter Signed-off-by: Helge Deller --- drivers/video/fbdev/cirrusfb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index 3d47c347b897..51e072c03e1c 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -2184,12 +2184,6 @@ static struct pci_driver cirrusfb_pci_driver = { .id_table = cirrusfb_pci_table, .probe = cirrusfb_pci_register, .remove = cirrusfb_pci_unregister, -#ifdef CONFIG_PM -#if 0 - .suspend = cirrusfb_pci_suspend, - .resume = cirrusfb_pci_resume, -#endif -#endif }; #endif /* CONFIG_PCI */ -- cgit v1.2.3 From 267173cbf4a6b37599e644098c756e7e4b771fe9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Jun 2022 18:11:12 -0500 Subject: video: fbdev: skeletonfb: Convert to generic power management PCI-specific power management (pci_driver.suspend and pci_driver.resume) is deprecated. If drivers implement power management, they should use the generic power management framework, not the PCI-specific hooks. Convert the sample code to use the generic power management framework. Signed-off-by: Bjorn Helgaas Acked-by: Daniel Vetter Signed-off-by: Helge Deller --- drivers/video/fbdev/skeletonfb.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c index 3d4d78362ede..d119b1d08007 100644 --- a/drivers/video/fbdev/skeletonfb.c +++ b/drivers/video/fbdev/skeletonfb.c @@ -838,9 +838,9 @@ static void xxxfb_remove(struct pci_dev *dev) * * See Documentation/driver-api/pm/devices.rst for more information */ -static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg) +static int xxxfb_suspend(struct device *dev) { - struct fb_info *info = pci_get_drvdata(dev); + struct fb_info *info = dev_get_drvdata(dev); struct xxxfb_par *par = info->par; /* suspend here */ @@ -853,9 +853,9 @@ static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg) * * See Documentation/driver-api/pm/devices.rst for more information */ -static int xxxfb_resume(struct pci_dev *dev) +static int xxxfb_resume(struct device *dev) { - struct fb_info *info = pci_get_drvdata(dev); + struct fb_info *info = dev_get_drvdata(dev); struct xxxfb_par *par = info->par; /* resume here */ @@ -873,14 +873,15 @@ static const struct pci_device_id xxxfb_id_table[] = { { 0, } }; +static SIMPLE_DEV_PM_OPS(xxxfb_pm_ops, xxxfb_suspend, xxxfb_resume); + /* For PCI drivers */ static struct pci_driver xxxfb_driver = { .name = "xxxfb", .id_table = xxxfb_id_table, .probe = xxxfb_probe, .remove = xxxfb_remove, - .suspend = xxxfb_suspend, /* optional but recommended */ - .resume = xxxfb_resume, /* optional but recommended */ + .driver.pm = xxxfb_pm_ops, /* optional but recommended */ }; MODULE_DEVICE_TABLE(pci, xxxfb_id_table); -- cgit v1.2.3 From 1bacd264d3c3a05de4afdd1712c9dd6ccebb9490 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2022 06:39:27 -0600 Subject: io_uring: mark reissue requests with REQ_F_PARTIAL_IO If we mark for reissue, we assume that the buffer will remain stable. Hence if are using a provided buffer, we need to ensure that we stick with it for the duration of that request. This only affects block devices that use provided buffers, as those are the only ones that get marked with REQ_F_REISSUE. Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d3ee4fc532fa..87c65a358678 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3437,7 +3437,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) if (unlikely(res != req->cqe.res)) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { - req->flags |= REQ_F_REISSUE; + req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; return true; } req_set_fail(req); @@ -3487,7 +3487,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) kiocb_end_write(req); if (unlikely(res != req->cqe.res)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { - req->flags |= REQ_F_REISSUE; + req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; return; } req->cqe.res = res; -- cgit v1.2.3 From ea50e2a1540fd94e6439a961daae595f65e574fb Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 16 Jun 2022 09:34:33 +0200 Subject: regmap: Re-introduce bulk read support check in regmap_bulk_read() Support for drivers to define bulk read/write callbacks in regmap_config was introduced by the commit d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config"), but this commit wrongly dropped a check in regmap_bulk_read() to determine whether bulk reads can be done or not. Before that commit, it was checked if map->bus was set. Now has to check if a map->read callback has been set. Fixes: d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config") Signed-off-by: Javier Martinez Canillas Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20220616073435.1988219-2-javierm@redhat.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 2221d9863831..e5bb70374ffc 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -3017,7 +3017,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, if (val_count == 0) return -EINVAL; - if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { + if (map->read && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { ret = regmap_raw_read(map, reg, val, val_bytes * val_count); if (ret != 0) return ret; -- cgit v1.2.3 From c42e99a3f93b4ca15720fdfd7aa8f6141dcc2a58 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 16 Jun 2022 09:34:34 +0200 Subject: regmap: Make regmap_noinc_read() return -ENOTSUPP if map->read isn't set Before adding support to define bulk read/write callbacks in regmap_config by the commit d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config"), the regmap_noinc_read() function returned an errno early a map->bus->read callback wasn't set. But that commit dropped the check and now a call to _regmap_raw_read() is attempted even when bulk read operations are not supported. That function checks for map->read anyways but there's no point to continue if the read can't succeed. Also is a fragile assumption to make so is better to make it fail earlier. Fixes: d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config") Signed-off-by: Javier Martinez Canillas Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20220616073435.1988219-3-javierm@redhat.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index e5bb70374ffc..f37f80a52115 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2904,6 +2904,9 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, size_t read_len; int ret; + if (!map->read) + return -ENOTSUPP; + if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) -- cgit v1.2.3 From 2a166929bc0a3ae754365dabc455039fd1be82ca Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 16 Jun 2022 09:34:35 +0200 Subject: regmap: Wire up regmap_config provided bulk write in missed functions There are some functions that were missed by commit d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config") when support to define bulk read/write callbacks in regmap_config was introduced. The regmap_bulk_write() and regmap_noinc_write() functions weren't changed to use the added map->write instead of the map->bus->write handler. Also, the regmap_can_raw_write() was not modified to take map->write into account. So will only return true if a bus with a .write callback is set. Fixes: d77e74561368 ("regmap: Add bulk read/write callbacks into regmap_config") Signed-off-by: Javier Martinez Canillas Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20220616073435.1988219-4-javierm@redhat.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f37f80a52115..c3517ccc3159 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1880,8 +1880,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, */ bool regmap_can_raw_write(struct regmap *map) { - return map->bus && map->bus->write && map->format.format_val && - map->format.format_reg; + return map->write && map->format.format_val && map->format.format_reg; } EXPORT_SYMBOL_GPL(regmap_can_raw_write); @@ -2155,10 +2154,9 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg, size_t write_len; int ret; - if (!map->bus) - return -EINVAL; - if (!map->bus->write) + if (!map->write) return -ENOTSUPP; + if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) @@ -2278,7 +2276,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val, * Some devices don't support bulk write, for them we have a series of * single write operations. */ - if (!map->bus || !map->format.parse_inplace) { + if (!map->write || !map->format.parse_inplace) { map->lock(map->lock_arg); for (i = 0; i < val_count; i++) { unsigned int ival; -- cgit v1.2.3 From c7b28f52f406bc89d15ca0ccbc47994f979f2fcd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 13 Jun 2022 12:22:41 +0200 Subject: drm/i915/display: Re-add check for low voltage sku for max dp source rate This reverts commit 73867c8709b5 ("drm/i915/display: Remove check for low voltage sku for max dp source rate"), which, on an i7-11850H iGPU with a Thinkpad X1 Extreme Gen 4, attached to a LG LP160UQ1-SPB1 embedded panel, causes wild flickering glitching technicolor pyrotechnics on resumption from suspend. The display shows strobing colors in an utter disaster explosion of pantone, as though bombs were dropped on the leprechauns at the base of the rainbow. Rebooting the machine fixes the issue, presumably because the display is initialized by firmware rather than by i915. Otherwise, the GPU appears to work fine. Bisection traced it back to this commit, which makes sense given the issues. Note: This re-opens, and puts back to the drawing board, https://gitlab.freedesktop.org/drm/intel/-/issues/5272 which was fixed by the regressing commit. Fixes: 73867c8709b5 ("drm/i915/display: Remove check for low voltage sku for max dp source rate") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6205 Cc: Ankit Nautiyal Cc: Imre Deak Cc: Jani Nikula Cc: Uma Shankar Cc: Animesh Manna Cc: Jani Saarinen Signed-off-by: Jason A. Donenfeld Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220613102241.9236-1-Jason@zx2c4.com (cherry picked from commit d5929835080a60f9119d024fa42f315913942f76) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e4a79c11fd25..ff67899522cf 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -388,13 +388,23 @@ static int dg2_max_source_rate(struct intel_dp *intel_dp) return intel_dp_is_edp(intel_dp) ? 810000 : 1350000; } +static bool is_low_voltage_sku(struct drm_i915_private *i915, enum phy phy) +{ + u32 voltage; + + voltage = intel_de_read(i915, ICL_PORT_COMP_DW3(phy)) & VOLTAGE_INFO_MASK; + + return voltage == VOLTAGE_INFO_0_85V; +} + static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); - if (intel_phy_is_combo(dev_priv, phy) && !intel_dp_is_edp(intel_dp)) + if (intel_phy_is_combo(dev_priv, phy) && + (is_low_voltage_sku(dev_priv, phy) || !intel_dp_is_edp(intel_dp))) return 540000; return 810000; @@ -402,7 +412,23 @@ static int icl_max_source_rate(struct intel_dp *intel_dp) static int ehl_max_source_rate(struct intel_dp *intel_dp) { - if (intel_dp_is_edp(intel_dp)) + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); + + if (intel_dp_is_edp(intel_dp) || is_low_voltage_sku(dev_priv, phy)) + return 540000; + + return 810000; +} + +static int dg1_max_source_rate(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(i915, dig_port->base.port); + + if (intel_phy_is_combo(i915, phy) && is_low_voltage_sku(i915, phy)) return 540000; return 810000; @@ -445,7 +471,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) max_rate = dg2_max_source_rate(intel_dp); else if (IS_ALDERLAKE_P(dev_priv) || IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv)) - max_rate = 810000; + max_rate = dg1_max_source_rate(intel_dp); else if (IS_JSL_EHL(dev_priv)) max_rate = ehl_max_source_rate(intel_dp); else -- cgit v1.2.3 From a09d2d00af53b43c6f11e6ab3cb58443c2cac8a7 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 20 Jun 2022 07:17:46 -0700 Subject: video: fbdev: pxa3xx-gcu: Fix integer overflow in pxa3xx_gcu_write In pxa3xx_gcu_write, a count parameter of type size_t is passed to words of type int. Then, copy_from_user() may cause a heap overflow because it is used as the third argument of copy_from_user(). Signed-off-by: Hyunwoo Kim Signed-off-by: Helge Deller --- drivers/video/fbdev/pxa3xx-gcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 043cc8f9ef1c..c3cd1e1cc01b 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -381,7 +381,7 @@ pxa3xx_gcu_write(struct file *file, const char *buff, struct pxa3xx_gcu_batch *buffer; struct pxa3xx_gcu_priv *priv = to_pxa3xx_gcu_priv(file); - int words = count / 4; + size_t words = count / 4; /* Does not need to be atomic. There's a lock in user space, * but anyhow, this is just for statistics. */ -- cgit v1.2.3 From b5c525abe717f2f41684b8581c13347d50d5285a Mon Sep 17 00:00:00 2001 From: Yihao Han Date: Wed, 8 Jun 2022 04:43:25 -0700 Subject: video: fbdev: au1100fb: Drop unnecessary NULL ptr check clk_disable() already checks the clk ptr using IS_ERR_OR_NULL(clk) and clk_enable() checks the clk ptr using !clk, so there is no need to check clk ptr again before calling them. Signed-off-by: Yihao Han Signed-off-by: Helge Deller --- drivers/video/fbdev/au1100fb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index 52f731a61482..519313b8bb00 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c @@ -560,8 +560,7 @@ int au1100fb_drv_suspend(struct platform_device *dev, pm_message_t state) /* Blank the LCD */ au1100fb_fb_blank(VESA_POWERDOWN, &fbdev->info); - if (fbdev->lcdclk) - clk_disable(fbdev->lcdclk); + clk_disable(fbdev->lcdclk); memcpy(&fbregs, fbdev->regs, sizeof(struct au1100fb_regs)); @@ -577,8 +576,7 @@ int au1100fb_drv_resume(struct platform_device *dev) memcpy(fbdev->regs, &fbregs, sizeof(struct au1100fb_regs)); - if (fbdev->lcdclk) - clk_enable(fbdev->lcdclk); + clk_enable(fbdev->lcdclk); /* Unblank the LCD */ au1100fb_fb_blank(VESA_NO_BLANKING, &fbdev->info); -- cgit v1.2.3 From 5491424d17bdeb7b7852a59367858251783f8398 Mon Sep 17 00:00:00 2001 From: Yihao Han Date: Thu, 2 Jun 2022 02:42:18 -0700 Subject: video: fbdev: simplefb: Check before clk_put() not needed clk_put() already checks the clk ptr using !clk and IS_ERR() so there is no need to check it again before calling it. Signed-off-by: Yihao Han Reviewed-by: Hans de Goede Signed-off-by: Helge Deller --- drivers/video/fbdev/simplefb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index 2c198561c338..f96ce8801be4 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -237,8 +237,7 @@ static int simplefb_clocks_get(struct simplefb_par *par, if (IS_ERR(clock)) { if (PTR_ERR(clock) == -EPROBE_DEFER) { while (--i >= 0) { - if (par->clks[i]) - clk_put(par->clks[i]); + clk_put(par->clks[i]); } kfree(par->clks); return -EPROBE_DEFER; -- cgit v1.2.3 From 5ccc944dce3df5fd2fd683a7df4fd49d1068eba2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 10 Jun 2022 14:44:41 -0400 Subject: filemap: Correct the conditions for marking a folio as accessed We had an off-by-one error which meant that we never marked the first page in a read as accessed. This was visible as a slowdown when re-reading a file as pages were being evicted from cache too soon. In reviewing this code, we noticed a second bug where a multi-page folio would be marked as accessed multiple times when doing reads that were less than the size of the folio. Abstract the comparison of whether two file positions are in the same folio into a new function, fixing both of these bugs. Reported-by: Yu Kuai Reviewed-by: Kent Overstreet Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index ac3775c1ce4c..577068868449 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2629,6 +2629,13 @@ err: return err; } +static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) +{ + unsigned int shift = folio_shift(folio); + + return (pos1 >> shift == pos2 >> shift); +} + /** * filemap_read - Read data from the page cache. * @iocb: The iocb to read. @@ -2700,11 +2707,11 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, writably_mapped = mapping_writably_mapped(mapping); /* - * When a sequential read accesses a page several times, only + * When a read accesses the same folio several times, only * mark it as accessed the first time. */ - if (iocb->ki_pos >> PAGE_SHIFT != - ra->prev_pos >> PAGE_SHIFT) + if (!pos_same_folio(iocb->ki_pos, ra->prev_pos - 1, + fbatch.folios[0])) folio_mark_accessed(fbatch.folios[0]); for (i = 0; i < folio_batch_count(&fbatch); i++) { -- cgit v1.2.3 From cb995f4eeba9d268fd4b56c2423ad6c1d1ea1b82 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 20:00:17 -0400 Subject: filemap: Handle sibling entries in filemap_get_read_batch() If a read races with an invalidation followed by another read, it is possible for a folio to be replaced with a higher-order folio. If that happens, we'll see a sibling entry for the new folio in the next iteration of the loop. This manifests as a NULL pointer dereference while holding the RCU read lock. Handle this by simply returning. The next call will find the new folio and handle it correctly. The other ways of handling this rare race are more complex and it's just not worth it. Reported-by: Dave Chinner Reported-by: Brian Foster Debugged-by: Brian Foster Tested-by: Brian Foster Reviewed-by: Brian Foster Fixes: cbd59c48ae2b ("mm/filemap: use head pages in generic_file_buffered_read") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index 577068868449..ffdfbc8b0e3c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2385,6 +2385,8 @@ static void filemap_get_read_batch(struct address_space *mapping, continue; if (xas.xa_index > max || xa_is_value(folio)) break; + if (xa_is_sibling(folio)) + break; if (!folio_try_get_rcu(folio)) goto retry; -- cgit v1.2.3 From 73130a7b1ac92c9f30e0a255951129f4851c5794 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 18 Jun 2022 17:24:23 -0500 Subject: smb3: fix empty netname context on secondary channels Some servers do not allow null netname contexts, which would cause multichannel to revert to single channel when mounting to some servers (e.g. Azure xSMB). Fixes: 4c14d7043fede ("cifs: populate empty hostnames for extra channels") Reviewed-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b515140bad8d..5e8c4737b183 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -570,16 +570,18 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += ctxt_len; pneg_ctxt += ctxt_len; - ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, - server->hostname); - *total_len += ctxt_len; - pneg_ctxt += ctxt_len; - build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); pneg_ctxt += sizeof(struct smb2_posix_neg_context); - neg_context_count = 4; + if (server->hostname && (server->hostname[0] != 0)) { + ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, + server->hostname); + *total_len += ctxt_len; + pneg_ctxt += ctxt_len; + neg_context_count = 4; + } else /* second channels do not have a hostname */ + neg_context_count = 3; if (server->compress_algorithm) { build_compression_ctxt((struct smb2_compression_capabilities_context *) -- cgit v1.2.3 From dbab764ed5e987306480f827775876b99b81429e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 13 Jun 2022 14:46:47 +0200 Subject: MAINTAINERS: add include/dt-bindings/usb to USB SUBSYSTEM Maintainers of the directory Documentation/devicetree/bindings/usb are also the maintainers of the corresponding directory include/dt-bindings/usb. Add the file entry for include/dt-bindings/usb to the appropriate section in MAINTAINERS. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20220613124647.32019-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..f6c2182e7c38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20714,6 +20714,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git F: Documentation/devicetree/bindings/usb/ F: Documentation/usb/ F: drivers/usb/ +F: include/dt-bindings/usb/ F: include/linux/usb.h F: include/linux/usb/ -- cgit v1.2.3 From f2d8c2606825317b77db1f9ba0fc26ef26160b30 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 13 Jun 2022 10:17:03 -0400 Subject: usb: gadget: Fix non-unique driver names in raw-gadget driver In a report for a separate bug (which has already been fixed by commit 5f0b5f4d50fa "usb: gadget: fix race when gadget driver register via ioctl") in the raw-gadget driver, the syzbot console log included error messages caused by attempted registration of a new driver with the same name as an existing driver: > kobject_add_internal failed for raw-gadget with -EEXIST, don't try to register things with the same name in the same directory. > UDC core: USB Raw Gadget: driver registration failed: -17 > misc raw-gadget: fail, usb_gadget_register_driver returned -17 These errors arise because raw_gadget.c registers a separate UDC driver for each of the UDC instances it creates, but these drivers all have the same name: "raw-gadget". Until recently this wasn't a problem, but when the "gadget" bus was added and UDC drivers were registered on this bus, it became possible for name conflicts to cause the registrations to fail. The reason is simply that the bus code in the driver core uses the driver name as a sysfs directory name (e.g., /sys/bus/gadget/drivers/raw-gadget/), and you can't create two directories with the same pathname. To fix this problem, the driver names used by raw-gadget are made distinct by appending a unique ID number: "raw-gadget.N", with a different value of N for each driver instance. And to avoid the proliferation of error handling code in the raw_ioctl_init() routine, the error return paths are refactored into the common pattern (goto statements leading to cleanup code at the end of the routine). Link: https://lore.kernel.org/all/0000000000008c664105dffae2eb@google.com/ Fixes: fc274c1e9973 "USB: gadget: Add a new bus for gadgets" CC: Andrey Konovalov CC: Reported-and-tested-by: syzbot+02b16343704b3af1667e@syzkaller.appspotmail.com Reviewed-by: Andrey Konovalov Acked-by: Hillf Danton Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YqdG32w+3h8c1s7z@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 62 +++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 241740024c50..5c8481cef35f 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,9 @@ MODULE_LICENSE("GPL"); /*----------------------------------------------------------------------*/ +static DEFINE_IDA(driver_id_numbers); +#define DRIVER_DRIVER_NAME_LENGTH_MAX 32 + #define RAW_EVENT_QUEUE_SIZE 16 struct raw_event_queue { @@ -161,6 +165,9 @@ struct raw_dev { /* Reference to misc device: */ struct device *dev; + /* Make driver names unique */ + int driver_id_number; + /* Protected by lock: */ enum dev_state state; bool gadget_registered; @@ -189,6 +196,7 @@ static struct raw_dev *dev_new(void) spin_lock_init(&dev->lock); init_completion(&dev->ep0_done); raw_event_queue_init(&dev->queue); + dev->driver_id_number = -1; return dev; } @@ -199,6 +207,9 @@ static void dev_free(struct kref *kref) kfree(dev->udc_name); kfree(dev->driver.udc_name); + kfree(dev->driver.driver.name); + if (dev->driver_id_number >= 0) + ida_free(&driver_id_numbers, dev->driver_id_number); if (dev->req) { if (dev->ep0_urb_queued) usb_ep_dequeue(dev->gadget->ep0, dev->req); @@ -422,6 +433,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; + char *driver_driver_name; unsigned long flags; if (copy_from_user(&arg, (void __user *)value, sizeof(arg))) @@ -440,36 +452,44 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } + ret = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (ret < 0) + return ret; + dev->driver_id_number = ret; + + driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); + if (!driver_driver_name) { + ret = -ENOMEM; + goto out_free_driver_id_number; + } + snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, + DRIVER_NAME ".%d", dev->driver_id_number); + udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); - if (!udc_driver_name) - return -ENOMEM; + if (!udc_driver_name) { + ret = -ENOMEM; + goto out_free_driver_driver_name; + } ret = strscpy(udc_driver_name, &arg.driver_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - return ret; - } + if (ret < 0) + goto out_free_udc_driver_name; ret = 0; udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_device_name) { - kfree(udc_driver_name); - return -ENOMEM; + ret = -ENOMEM; + goto out_free_udc_driver_name; } ret = strscpy(udc_device_name, &arg.device_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - kfree(udc_device_name); - return ret; - } + if (ret < 0) + goto out_free_udc_device_name; ret = 0; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_OPENED) { dev_dbg(dev->dev, "fail, device is not opened\n"); - kfree(udc_driver_name); - kfree(udc_device_name); ret = -EINVAL; goto out_unlock; } @@ -484,14 +504,24 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.suspend = gadget_suspend; dev->driver.resume = gadget_resume; dev->driver.reset = gadget_reset; - dev->driver.driver.name = DRIVER_NAME; + dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; dev->state = STATE_DEV_INITIALIZED; + spin_unlock_irqrestore(&dev->lock, flags); + return ret; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); +out_free_udc_device_name: + kfree(udc_device_name); +out_free_udc_driver_name: + kfree(udc_driver_name); +out_free_driver_driver_name: + kfree(driver_driver_name); +out_free_driver_id_number: + ida_free(&driver_id_numbers, dev->driver_id_number); return ret; } -- cgit v1.2.3 From d4597898ba7b9d467b94a9aafd65ec408a75041f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Jun 2022 10:41:17 +0100 Subject: btrfs: fix race between reflinking and ordered extent completion While doing a reflink operation, if an ordered extent for a file range that does not overlap with the source and destination ranges of the reflink operation happens, we can end up having a failure in the reflink operation and return -EINVAL to user space. The following sequence of steps explains how this can happen: 1) We have the page at file offset 315392 dirty (under delalloc); 2) A reflink operation for this file starts, using the same file as both source and destination, the source range is [372736, 409600) (length of 36864 bytes) and the destination range is [208896, 245760); 3) At btrfs_remap_file_range_prep(), we flush all delalloc in the source and destination ranges, and wait for any ordered extents in those range to complete; 4) Still at btrfs_remap_file_range_prep(), we then flush all delalloc in the inode, but we neither wait for it to complete nor any ordered extents to complete. This results in starting delalloc for the page at file offset 315392 and creating an ordered extent for that single page range; 5) We then move to btrfs_clone() and enter the loop to find file extent items to copy from the source range to destination range; 6) In the first iteration we end up at last file extent item stored in leaf A: (...) item 131 key (143616 108 315392) itemoff 5101 itemsize 53 extent data disk bytenr 1903988736 nr 73728 extent data offset 12288 nr 61440 ram 73728 This represents the file range [315392, 376832), which overlaps with the source range to clone. @datal is set to 61440, key.offset is 315392 and @next_key_min_offset is therefore set to 376832 (315392 + 61440). @off (372736) is > key.offset (315392), so @new_key.offset is set to the value of @destoff (208896). @new_key.offset == @last_dest_end (208896) so @drop_start is set to 208896 (@new_key.offset). @datal is adjusted to 4096, as @off is > @key.offset. So in this iteration we call btrfs_replace_file_extents() for the range [208896, 212991] (a single page, which is [@drop_start, @new_key.offset + @datal - 1]). @last_dest_end is set to 212992 (@new_key.offset + @datal = 208896 + 4096 = 212992). Before the next iteration of the loop, @key.offset is set to the value 376832, which is @next_key_min_offset; 7) On the second iteration btrfs_search_slot() leaves us again at leaf A, but this time pointing beyond the last slot of leaf A, as that's where a key with offset 376832 should be at if it existed. So end up calling btrfs_next_leaf(); 8) btrfs_next_leaf() releases the path, but before it searches again the tree for the next key/leaf, the ordered extent for the single page range at file offset 315392 completes. That results in trimming the file extent item we processed before, adjusting its key offset from 315392 to 319488, reducing its length from 61440 to 57344 and inserting a new file extent item for that single page range, with a key offset of 315392 and a length of 4096. Leaf A now looks like: (...) item 132 key (143616 108 315392) itemoff 4995 itemsize 53 extent data disk bytenr 1801666560 nr 4096 extent data offset 0 nr 4096 ram 4096 item 133 key (143616 108 319488) itemoff 4942 itemsize 53 extent data disk bytenr 1903988736 nr 73728 extent data offset 16384 nr 57344 ram 73728 9) When btrfs_next_leaf() returns, it gives us a path pointing to leaf A at slot 133, since it's the first key that follows what was the last key we saw (143616 108 315392). In fact it's the same item we processed before, but its key offset was changed, so it counts as a new key; 10) So now we have: @key.offset == 319488 @datal == 57344 @off (372736) is > key.offset (319488), so @new_key.offset is set to 208896 (@destoff value). @new_key.offset (208896) != @last_dest_end (212992), so @drop_start is set to 212992 (@last_dest_end value). @datal is adjusted to 4096 because @off > @key.offset. So in this iteration we call btrfs_replace_file_extents() for the invalid range of [212992, 212991] (which is [@drop_start, @new_key.offset + @datal - 1]). This range is empty, the end offset is smaller than the start offset so btrfs_replace_file_extents() returns -EINVAL, which we end up returning to user space and fail the reflink operation. This all happens because the range of this file extent item was already processed in the previous iteration. This scenario can be triggered very sporadically by fsx from fstests, for example with test case generic/522. So fix this by having btrfs_clone() skip file extent items that cover a file range that we have already processed. CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/reflink.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index c39f8b3a5a4a..912f4aa21a24 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -344,6 +344,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, int ret; const u64 len = olen_aligned; u64 last_dest_end = destoff; + u64 prev_extent_end = off; ret = -ENOMEM; buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); @@ -363,7 +364,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, key.offset = off; while (1) { - u64 next_key_min_offset = key.offset + 1; struct btrfs_file_extent_item *extent; u64 extent_gen; int type; @@ -431,14 +431,21 @@ process_slot: * The first search might have left us at an extent item that * ends before our target range's start, can happen if we have * holes and NO_HOLES feature enabled. + * + * Subsequent searches may leave us on a file range we have + * processed before - this happens due to a race with ordered + * extent completion for a file range that is outside our source + * range, but that range was part of a file extent item that + * also covered a leading part of our source range. */ - if (key.offset + datal <= off) { + if (key.offset + datal <= prev_extent_end) { path->slots[0]++; goto process_slot; } else if (key.offset >= off + len) { break; } - next_key_min_offset = key.offset + datal; + + prev_extent_end = key.offset + datal; size = btrfs_item_size(leaf, slot); read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); @@ -550,7 +557,7 @@ process_slot: break; btrfs_release_path(path); - key.offset = next_key_min_offset; + key.offset = prev_extent_end; if (fatal_signal_pending(current)) { ret = -EINTR; -- cgit v1.2.3 From 983d8209c6803345c9958f4cc358d1155f93a099 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Jun 2022 10:41:18 +0100 Subject: btrfs: add missing inode updates on each iteration when replacing extents When replacing file extents, called during fallocate, hole punching, clone and deduplication, we may not be able to replace/drop all the target file extent items with a single transaction handle. We may get -ENOSPC while doing it, in which case we release the transaction handle, balance the dirty pages of the btree inode, flush delayed items and get a new transaction handle to operate on what's left of the target range. By dropping and replacing file extent items we have effectively modified the inode, so we should bump its iversion and update its mtime/ctime before we update the inode item. This is because if the transaction we used for partially modifying the inode gets committed by someone after we release it and before we finish the rest of the range, a power failure happens, then after mounting the filesystem our inode has an outdated iversion and mtime/ctime, corresponding to the values it had before we changed it. So add the missing iversion and mtime/ctime updates. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/file.c | 19 +++++++++++++++++++ fs/btrfs/inode.c | 1 + fs/btrfs/reflink.c | 1 + 4 files changed, 23 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0e49b1a0c071..415bf1823fb3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1330,6 +1330,8 @@ struct btrfs_replace_extent_info { * existing extent into a file range. */ bool is_new_extent; + /* Indicate if we should update the inode's mtime and ctime. */ + bool update_times; /* Meaningful only if is_new_extent is true. */ int qgroup_reserved; /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 46c2baa8fdf5..8e7fb3e6f79c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2802,6 +2802,25 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, extent_info->file_offset += replace_len; } + /* + * We are releasing our handle on the transaction, balance the + * dirty pages of the btree inode and flush delayed items, and + * then get a new transaction handle, which may now point to a + * new transaction in case someone else may have committed the + * transaction we used to replace/drop file extent items. So + * bump the inode's iversion and update mtime and ctime except + * if we are called from a dedupe context. This is because a + * power failure/crash may happen after the transaction is + * committed and before we finish replacing/dropping all the + * file extent items we need. + */ + inode_inc_iversion(&inode->vfs_inode); + + if (!extent_info || extent_info->update_times) { + inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode); + inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime; + } + ret = btrfs_update_inode(trans, root, inode); if (ret) break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da13bd0d10f1..a642d34c1363 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9897,6 +9897,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( extent_info.file_offset = file_offset; extent_info.extent_buf = (char *)&stack_fi; extent_info.is_new_extent = true; + extent_info.update_times = true; extent_info.qgroup_reserved = qgroup_released; extent_info.insertions = 0; diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 912f4aa21a24..a3549d587464 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -496,6 +496,7 @@ process_slot: clone_info.file_offset = new_key.offset; clone_info.extent_buf = buf; clone_info.is_new_extent = false; + clone_info.update_times = !no_time_update; ret = btrfs_replace_file_extents(BTRFS_I(inode), path, drop_start, new_key.offset + datal - 1, &clone_info, &trans); -- cgit v1.2.3 From 650c9caba32a0167a018cca0fab32a2965d23513 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Jun 2022 10:41:19 +0100 Subject: btrfs: do not BUG_ON() on failure to migrate space when replacing extents At btrfs_replace_file_extents(), if we fail to migrate reserved metadata space from the transaction block reserve into the local block reserve, we trigger a BUG_ON(). This is because it should not be possible to have a failure here, as we reserved more space when we started the transaction than the space we want to migrate. However having a BUG_ON() is way too drastic, we can perfectly handle the failure and return the error to the caller. So just do that instead, and add a WARN_ON() to make it easier to notice the failure if it ever happens (which is particularly useful for fstests, and the warning will trigger a failure of a test case). Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8e7fb3e6f79c..dd30639ecac2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2718,7 +2718,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, false); - BUG_ON(ret); + if (WARN_ON(ret)) + goto out_trans; trans->block_rsv = rsv; cur_offset = start; @@ -2837,7 +2838,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, false); - BUG_ON(ret); /* shouldn't happen */ + if (WARN_ON(ret)) + break; trans->block_rsv = rsv; cur_offset = drop_args.drop_end; -- cgit v1.2.3 From 343d8a30851c48a4ef0f5ef61d5e9fbd847a6883 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 7 Jun 2022 16:08:29 +0900 Subject: btrfs: zoned: prevent allocation from previous data relocation BG After commit 5f0addf7b890 ("btrfs: zoned: use dedicated lock for data relocation"), we observe IO errors on e.g, btrfs/232 like below. [09.0][T4038707] WARNING: CPU: 3 PID: 4038707 at fs/btrfs/extent-tree.c:2381 btrfs_cross_ref_exist+0xfc/0x120 [btrfs] [09.9][T4038707] Call Trace: [09.5][T4038707] [09.3][T4038707] run_delalloc_nocow+0x7f1/0x11a0 [btrfs] [09.6][T4038707] ? test_range_bit+0x174/0x320 [btrfs] [09.2][T4038707] ? fallback_to_cow+0x980/0x980 [btrfs] [09.3][T4038707] ? find_lock_delalloc_range+0x33e/0x3e0 [btrfs] [09.5][T4038707] btrfs_run_delalloc_range+0x445/0x1320 [btrfs] [09.2][T4038707] ? test_range_bit+0x320/0x320 [btrfs] [09.4][T4038707] ? lock_downgrade+0x6a0/0x6a0 [09.2][T4038707] ? orc_find.part.0+0x1ed/0x300 [09.5][T4038707] ? __module_address.part.0+0x25/0x300 [09.0][T4038707] writepage_delalloc+0x159/0x310 [btrfs] [09.4][ C3] sd 10:0:1:0: [sde] tag#2620 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s [09.5][ C3] sd 10:0:1:0: [sde] tag#2620 Sense Key : Illegal Request [current] [09.9][ C3] sd 10:0:1:0: [sde] tag#2620 Add. Sense: Unaligned write command [09.5][ C3] sd 10:0:1:0: [sde] tag#2620 CDB: Write(16) 8a 00 00 00 00 00 02 f3 63 87 00 00 00 2c 00 00 [09.4][ C3] critical target error, dev sde, sector 396041272 op 0x1:(WRITE) flags 0x800 phys_seg 3 prio class 0 [09.9][ C3] BTRFS error (device dm-1): bdev /dev/mapper/dml_102_2 errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 The IO errors occur when we allocate a regular extent in previous data relocation block group. On zoned btrfs, we use a dedicated block group to relocate a data extent. Thus, we allocate relocating data extents (pre-alloc) only from the dedicated block group and vice versa. Once the free space in the dedicated block group gets tight, a relocating extent may not fit into the block group. In that case, we need to switch the dedicated block group to the next one. Then, the previous one is now freed up for allocating a regular extent. The BG is already not enough to allocate the relocating extent, but there is still room to allocate a smaller extent. Now the problem happens. By allocating a regular extent while nocow IOs for the relocation is still on-going, we will issue WRITE IOs (for relocation) and ZONE APPEND IOs (for the regular writes) at the same time. That mixed IOs confuses the write pointer and arises the unaligned write errors. This commit introduces a new bit 'zoned_data_reloc_ongoing' to the btrfs_block_group. We set this bit before releasing the dedicated block group, and no extent are allocated from a block group having this bit set. This bit is similar to setting block_group->ro, but is different from it by allowing nocow writes to start. Once all the nocow IO for relocation is done (hooked from btrfs_finish_ordered_io), we reset the bit to release the block group for further allocation. Fixes: c2707a255623 ("btrfs: zoned: add a dedicated data relocation block group") CC: stable@vger.kernel.org # 5.16+ Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.h | 1 + fs/btrfs/extent-tree.c | 20 ++++++++++++++++++-- fs/btrfs/inode.c | 2 ++ fs/btrfs/zoned.c | 27 +++++++++++++++++++++++++++ fs/btrfs/zoned.h | 5 +++++ 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 3ac668ace50a..35e0e860cc0b 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -104,6 +104,7 @@ struct btrfs_block_group { unsigned int relocating_repair:1; unsigned int chunk_item_inserted:1; unsigned int zone_is_active:1; + unsigned int zoned_data_reloc_ongoing:1; int disk_cache_state; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fb367689d9d2..4515497d8a29 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3832,7 +3832,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, block_group->start == fs_info->data_reloc_bg || fs_info->data_reloc_bg == 0); - if (block_group->ro) { + if (block_group->ro || block_group->zoned_data_reloc_ongoing) { ret = 1; goto out; } @@ -3894,8 +3894,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; - if (ret && ffe_ctl->for_data_reloc) + if (ret && ffe_ctl->for_data_reloc && + fs_info->data_reloc_bg == block_group->start) { + /* + * Do not allow further allocations from this block group. + * Compared to increasing the ->ro, setting the + * ->zoned_data_reloc_ongoing flag still allows nocow + * writers to come in. See btrfs_inc_nocow_writers(). + * + * We need to disable an allocation to avoid an allocation of + * regular (non-relocation data) extent. With mix of relocation + * extents and regular extents, we can dispatch WRITE commands + * (for relocation extents) and ZONE APPEND commands (for + * regular extents) at the same time to the same zone, which + * easily break the write pointer. + */ + block_group->zoned_data_reloc_ongoing = 1; fs_info->data_reloc_bg = 0; + } spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a642d34c1363..ba527da61732 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3195,6 +3195,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ordered_extent->file_offset, ordered_extent->file_offset + logical_len); + btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); } else { BUG_ON(root == fs_info->tree_root); ret = insert_ordered_extent_file_extent(trans, ordered_extent); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 057babaa3e05..8aec53528efa 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2140,3 +2140,30 @@ bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) factor = div64_u64(used * 100, total); return factor >= fs_info->bg_reclaim_threshold; } + +void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, + u64 length) +{ + struct btrfs_block_group *block_group; + + if (!btrfs_is_zoned(fs_info)) + return; + + block_group = btrfs_lookup_block_group(fs_info, logical); + /* It should be called on a previous data relocation block group. */ + ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); + + spin_lock(&block_group->lock); + if (!block_group->zoned_data_reloc_ongoing) + goto out; + + /* All relocation extents are written. */ + if (block_group->start + block_group->alloc_offset == logical + length) { + /* Now, release this block group for further allocations. */ + block_group->zoned_data_reloc_ongoing = 0; + } + +out: + spin_unlock(&block_group->lock); + btrfs_put_block_group(block_group); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index bb1a189e11f9..6b2eec99162b 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -77,6 +77,8 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info); +void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, + u64 length); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -243,6 +245,9 @@ static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) { return false; } + +static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) { } #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) -- cgit v1.2.3 From 19ab78ca86981e0e1e73036fb73a508731a7c078 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 7 Jun 2022 16:08:30 +0900 Subject: btrfs: zoned: fix critical section of relocation inode writeback We use btrfs_zoned_data_reloc_{lock,unlock} to allow only one process to write out to the relocation inode. That critical section must include all the IO submission for the inode. However, flush_write_bio() in extent_writepages() is out of the critical section, causing an IO submission outside of the lock. This leads to an out of the order IO submission and fail the relocation process. Fix it by extending the critical section. Fixes: 35156d852762 ("btrfs: zoned: only allow one process to add pages to a relocation inode") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 588c7c606a2c..9c250b8cd548 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5240,13 +5240,14 @@ int extent_writepages(struct address_space *mapping, */ btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); ret = extent_write_cache_pages(mapping, wbc, &epd); - btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); ASSERT(ret <= 0); if (ret < 0) { + btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); end_write_bio(&epd, ret); return ret; } flush_write_bio(&epd); + btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); return ret; } -- cgit v1.2.3 From 97e86631bccddfbbe0c13f9a9605cdef11d31296 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Wed, 8 Jun 2022 22:39:36 -0400 Subject: btrfs: don't set lock_owner when locking extent buffer for reading In 196d59ab9ccc "btrfs: switch extent buffer tree lock to rw_semaphore" the functions for tree read locking were rewritten, and in the process the read lock functions started setting eb->lock_owner = current->pid. Previously lock_owner was only set in tree write lock functions. Read locks are shared, so they don't have exclusive ownership of the underlying object, so setting lock_owner to any single value for a read lock makes no sense. It's mostly harmless because write locks and read locks are mutually exclusive, and none of the existing code in btrfs (btrfs_init_new_buffer and print_eb_refs_lock) cares what nonsense is written in lock_owner when no writer is holding the lock. KCSAN does care, and will complain about the data race incessantly. Remove the assignments in the read lock functions because they're useless noise. Fixes: 196d59ab9ccc ("btrfs: switch extent buffer tree lock to rw_semaphore") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Zygo Blaxell Signed-off-by: David Sterba --- fs/btrfs/locking.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 313d9d685adb..33461b4f9c8b 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -45,7 +45,6 @@ void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting ne start_ns = ktime_get_ns(); down_read_nested(&eb->lock, nest); - eb->lock_owner = current->pid; trace_btrfs_tree_read_lock(eb, start_ns); } @@ -62,7 +61,6 @@ void btrfs_tree_read_lock(struct extent_buffer *eb) int btrfs_try_tree_read_lock(struct extent_buffer *eb) { if (down_read_trylock(&eb->lock)) { - eb->lock_owner = current->pid; trace_btrfs_try_tree_read_lock(eb); return 1; } @@ -90,7 +88,6 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) void btrfs_tree_read_unlock(struct extent_buffer *eb) { trace_btrfs_tree_read_unlock(eb); - eb->lock_owner = 0; up_read(&eb->lock); } -- cgit v1.2.3 From bf7ba8ee759b7b7a34787ddd8dc3f190a3d7fa24 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Jun 2022 15:09:49 -0400 Subject: btrfs: fix deadlock with fsync+fiemap+transaction commit We are hitting the following deadlock in production occasionally Task 1 Task 2 Task 3 Task 4 Task 5 fsync(A) start trans start commit falloc(A) lock 5m-10m start trans wait for commit fiemap(A) lock 0-10m wait for 5m-10m (have 0-5m locked) have btrfs_need_log_full_commit !full_sync wait_ordered_extents finish_ordered_io(A) lock 0-5m DEADLOCK We have an existing dependency of file extent lock -> transaction. However in fsync if we tried to do the fast logging, but then had to fall back to committing the transaction, we will be forced to call btrfs_wait_ordered_range() to make sure all of our extents are updated. This creates a dependency of transaction -> file extent lock, because btrfs_finish_ordered_io() will need to take the file extent lock in order to run the ordered extents. Fix this by stopping the transaction if we have to do the full commit and we attempted to do the fast logging. Then attach to the transaction and commit it if we need to. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/file.c | 67 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dd30639ecac2..af2f2b71d2df 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2322,25 +2322,62 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); - if (ret != BTRFS_NO_LOG_SYNC) { + if (ret == BTRFS_NO_LOG_SYNC) { + ret = btrfs_end_transaction(trans); + goto out; + } + + /* We successfully logged the inode, attempt to sync the log. */ + if (!ret) { + ret = btrfs_sync_log(trans, root, &ctx); if (!ret) { - ret = btrfs_sync_log(trans, root, &ctx); - if (!ret) { - ret = btrfs_end_transaction(trans); - goto out; - } - } - if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, len); - if (ret) { - btrfs_end_transaction(trans); - goto out; - } + ret = btrfs_end_transaction(trans); + goto out; } - ret = btrfs_commit_transaction(trans); - } else { + } + + /* + * At this point we need to commit the transaction because we had + * btrfs_need_log_full_commit() or some other error. + * + * If we didn't do a full sync we have to stop the trans handle, wait on + * the ordered extents, start it again and commit the transaction. If + * we attempt to wait on the ordered extents here we could deadlock with + * something like fallocate() that is holding the extent lock trying to + * start a transaction while some other thread is trying to commit the + * transaction while we (fsync) are currently holding the transaction + * open. + */ + if (!full_sync) { ret = btrfs_end_transaction(trans); + if (ret) + goto out; + ret = btrfs_wait_ordered_range(inode, start, len); + if (ret) + goto out; + + /* + * This is safe to use here because we're only interested in + * making sure the transaction that had the ordered extents is + * committed. We aren't waiting on anything past this point, + * we're purely getting the transaction and committing it. + */ + trans = btrfs_attach_transaction_barrier(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + + /* + * We committed the transaction and there's no currently + * running transaction, this means everything we care + * about made it to disk and we are done. + */ + if (ret == -ENOENT) + ret = 0; + goto out; + } } + + ret = btrfs_commit_transaction(trans); out: ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); -- cgit v1.2.3 From 037e127452b973f45b34c1e88a1af183e652e657 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 14 Jun 2022 15:27:48 +0200 Subject: Documentation: update btrfs list of features and link to readthedocs.io The btrfs documentation in kernel is only meant as a starting point, so update the list of features and add link to btrfs.readthedocs.io page that is most up-to-date. The wiki is still used but information is migrated from there. Signed-off-by: David Sterba --- Documentation/filesystems/btrfs.rst | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/btrfs.rst b/Documentation/filesystems/btrfs.rst index d0904f602819..992eddb0e11b 100644 --- a/Documentation/filesystems/btrfs.rst +++ b/Documentation/filesystems/btrfs.rst @@ -19,13 +19,23 @@ The main Btrfs features include: * Subvolumes (separate internal filesystem roots) * Object level mirroring and striping * Checksums on data and metadata (multiple algorithms available) - * Compression + * Compression (multiple algorithms available) + * Reflink, deduplication + * Scrub (on-line checksum verification) + * Hierarchical quota groups (subvolume and snapshot support) * Integrated multiple device support, with several raid algorithms * Offline filesystem check - * Efficient incremental backup and FS mirroring + * Efficient incremental backup and FS mirroring (send/receive) + * Trim/discard * Online filesystem defragmentation + * Swapfile support + * Zoned mode + * Read/write metadata verification + * Online resize (shrink, grow) -For more information please refer to the wiki +For more information please refer to the documentation site or wiki + + https://btrfs.readthedocs.io https://btrfs.wiki.kernel.org -- cgit v1.2.3 From b60cac14bb3c88cff2a7088d9095b01a80938c41 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Jun 2022 07:47:13 -0600 Subject: io_uring: fix merge error in checking send/recv addr2 flags With the dropping of the IOPOLL checking in the per-opcode handlers, we inadvertently left two checks in the recv/recvmsg and send/sendmsg prep handlers for the same thing, and one of them includes addr2 which holds the flags for these opcodes. Fix it up and kill the redundant checks. Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 87c65a358678..05508fe92b9c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6077,8 +6077,6 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->file_index)) return -EINVAL; - if (unlikely(sqe->addr2 || sqe->file_index)) - return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -6315,8 +6313,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->file_index)) return -EINVAL; - if (unlikely(sqe->addr2 || sqe->file_index)) - return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); -- cgit v1.2.3 From aacf2f9f382c91df73f33317e28a4c34c8038986 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 21 Jun 2022 13:25:06 +0100 Subject: io_uring: fix req->apoll_events apoll_events should be set once in the beginning of poll arming just as poll->events and not change after. However, currently io_uring resets it on each __io_poll_execute() for no clear reason. There is also a place in __io_arm_poll_handler() where we add EPOLLONESHOT to downgrade a multishot, but forget to do the same thing with ->apoll_events, which is buggy. Fixes: 81459350d581e ("io_uring: cache req->apoll->events in req->cflags") Signed-off-by: Pavel Begunkov Reviewed-by: Hao Xu Link: https://lore.kernel.org/r/0aef40399ba75b1a4d2c2e85e6e8fd93c02fc6e4.1655814213.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 05508fe92b9c..dffa85d4dc7a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6950,7 +6950,8 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) io_req_complete_failed(req, ret); } -static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events) +static void __io_poll_execute(struct io_kiocb *req, int mask, + __poll_t __maybe_unused events) { req->cqe.res = mask; /* @@ -6959,7 +6960,6 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events) * CPU. We want to avoid pulling in req->apoll->events for that * case. */ - req->apoll_events = events; if (req->opcode == IORING_OP_POLL_ADD) req->io_task_work.func = io_poll_task_func; else @@ -7110,6 +7110,8 @@ static int __io_arm_poll_handler(struct io_kiocb *req, io_init_poll_iocb(poll, mask, io_poll_wake); poll->file = req->file; + req->apoll_events = poll->events; + ipt->pt._key = mask; ipt->req = req; ipt->error = 0; @@ -7140,8 +7142,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (mask) { /* can't multishot if failed, just queue the event we've got */ - if (unlikely(ipt->error || !ipt->nr_entries)) + if (unlikely(ipt->error || !ipt->nr_entries)) { poll->events |= EPOLLONESHOT; + req->apoll_events |= EPOLLONESHOT; + } __io_poll_execute(req, mask, poll->events); return 0; } @@ -7388,7 +7392,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return -EINVAL; io_req_set_refcount(req); - req->apoll_events = poll->events = io_poll_parse_events(sqe, flags); + poll->events = io_poll_parse_events(sqe, flags); return 0; } -- cgit v1.2.3 From 0f074c1c95ea496dc91279b6c4b9845a337517fa Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2022 15:54:02 +0200 Subject: dt-bindings: usb: ohci: Increase the number of PHYs "make dtbs_check": arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee080000: phys: [[17, 0], [31]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ohci.yaml arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee0c0000: phys: [[17, 1], [33], [21, 0]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ohci.yaml Some USB OHCI controllers (e.g. on the Renesas RZ/G1C SoC) have multiple PHYs. Increase the maximum number of PHYs to 3, which is sufficient for now. Fixes: 0499220d6dadafa5 ("dt-bindings: Add missing array size constraints") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/0112f9c8881513cb33bf7b66bc743dd08b35a2f5.1655301203.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/generic-ohci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/generic-ohci.yaml b/Documentation/devicetree/bindings/usb/generic-ohci.yaml index e2ac84665316..bb6bbd5f129d 100644 --- a/Documentation/devicetree/bindings/usb/generic-ohci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ohci.yaml @@ -103,7 +103,8 @@ properties: Overrides the detected port count phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb -- cgit v1.2.3 From 9faa1c8f92f33daad9db96944139de225cefa199 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2022 15:53:09 +0200 Subject: dt-bindings: usb: ehci: Increase the number of PHYs "make dtbs_check": arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee080100: phys: [[17, 0], [31]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ehci.yaml arch/arm/boot/dts/r8a77470-iwg23s-sbc.dtb: usb@ee0c0100: phys: [[17, 1], [33], [21, 0]] is too long From schema: Documentation/devicetree/bindings/usb/generic-ehci.yaml Some USB EHCI controllers (e.g. on the Renesas RZ/G1C SoC) have multiple PHYs. Increase the maximum number of PHYs to 3, which is sufficient for now. Fixes: 0499220d6dadafa5 ("dt-bindings: Add missing array size constraints") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/c5d19e2f9714f43effd90208798fc1936098078f.1655301043.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/generic-ehci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml index 0b4524b6409e..1e84e1b7ab27 100644 --- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml @@ -136,7 +136,8 @@ properties: Phandle of a companion. phys: - maxItems: 1 + minItems: 1 + maxItems: 3 phy-names: const: usb -- cgit v1.2.3 From 96163f835e65f8c9897487fac965819f0651d671 Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Fri, 17 Jun 2022 11:31:53 -0500 Subject: usb: gadget: uvc: fix list double add in uvcg_video_pump A panic can occur if the endpoint becomes disabled and the uvcg_video_pump adds the request back to the req_free list after it has already been queued to the endpoint. The endpoint complete will add the request back to the req_free list. Invalidate the local request handle once it's been queued. <6>[ 246.796704][T13726] configfs-gadget gadget: uvc: uvc_function_set_alt(1, 0) <3>[ 246.797078][ T26] list_add double add: new=ffffff878bee5c40, prev=ffffff878bee5c40, next=ffffff878b0f0a90. <6>[ 246.797213][ T26] ------------[ cut here ]------------ <2>[ 246.797224][ T26] kernel BUG at lib/list_debug.c:31! <6>[ 246.807073][ T26] Call trace: <6>[ 246.807180][ T26] uvcg_video_pump+0x364/0x38c <6>[ 246.807366][ T26] process_one_work+0x2a4/0x544 <6>[ 246.807394][ T26] worker_thread+0x350/0x784 <6>[ 246.807442][ T26] kthread+0x2ac/0x320 Fixes: f9897ec0f6d3 ("usb: gadget: uvc: only pump video data if necessary") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Signed-off-by: Dan Vacura Link: https://lore.kernel.org/r/20220617163154.16621-1-w36195@motorola.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_video.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index a9bb4553db84..d42bb3346745 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -424,6 +424,9 @@ static void uvcg_video_pump(struct work_struct *work) uvcg_queue_cancel(queue, 0); break; } + + /* Endpoint now owns the request */ + req = NULL; video->req_int_count++; } -- cgit v1.2.3 From 9ef165406308515dcf2e3f6e97b39a1c56d86db5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 20 Jun 2022 13:43:16 +0300 Subject: usb: typec: wcove: Drop wrong dependency to INTEL_SOC_PMIC Intel SoC PMIC is a generic name for all PMICs that are used on Intel platforms. In particular, INTEL_SOC_PMIC kernel configuration option refers to Crystal Cove PMIC, which has never been a part of any Intel Broxton hardware. Drop wrong dependency from Kconfig. Note, the correct dependency is satisfied via ACPI PMIC OpRegion driver, which the Type-C depends on. Fixes: d2061f9cc32d ("usb: typec: add driver for Intel Whiskey Cove PMIC USB Type-C PHY") Reported-by: Hans de Goede Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220620104316.57592-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig index 557f392fe24d..073fd2ea5e0b 100644 --- a/drivers/usb/typec/tcpm/Kconfig +++ b/drivers/usb/typec/tcpm/Kconfig @@ -56,7 +56,6 @@ config TYPEC_WCOVE tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver" depends on ACPI depends on MFD_INTEL_PMC_BXT - depends on INTEL_SOC_PMIC depends on BXT_WC_PMIC_OPREGION help This driver adds support for USB Type-C on Intel Broxton platforms -- cgit v1.2.3 From f9710c357e5bbf64d7ce45ba0bc75a52222491c1 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 1 Jun 2022 15:53:41 -0400 Subject: xen-blkfront: Handle NULL gendisk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a VBD is not fully created and then closed, the kernel can have a NULL pointer dereference: The reproducer is trivial: [user@dom0 ~]$ sudo xl block-attach work backend=sys-usb vdev=xvdi target=/dev/sdz [user@dom0 ~]$ xl block-list work Vdev BE handle state evt-ch ring-ref BE-path 51712 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51712 51728 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51728 51744 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51744 51760 0 241 4 -1 -1 /local/domain/0/backend/vbd/241/51760 51840 3 241 3 -1 -1 /local/domain/3/backend/vbd/241/51840 ^ note state, the /dev/sdz doesn't exist in the backend [user@dom0 ~]$ sudo xl block-detach work xvdi [user@dom0 ~]$ xl block-list work Vdev BE handle state evt-ch ring-ref BE-path work is an invalid domain identifier And its console has: BUG: kernel NULL pointer dereference, address: 0000000000000050 PGD 80000000edebb067 P4D 80000000edebb067 PUD edec2067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 52 Comm: xenwatch Not tainted 5.16.18-2.43.fc32.qubes.x86_64 #1 RIP: 0010:blk_mq_stop_hw_queues+0x5/0x40 Code: 00 48 83 e0 fd 83 c3 01 48 89 85 a8 00 00 00 41 39 5c 24 50 77 c0 5b 5d 41 5c 41 5d c3 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <8b> 47 50 85 c0 74 32 41 54 49 89 fc 55 53 31 db 49 8b 44 24 48 48 RSP: 0018:ffffc90000bcfe98 EFLAGS: 00010293 RAX: ffffffffc0008370 RBX: 0000000000000005 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 RBP: ffff88800775f000 R08: 0000000000000001 R09: ffff888006e620b8 R10: ffff888006e620b0 R11: f000000000000000 R12: ffff8880bff39000 R13: ffff8880bff39000 R14: 0000000000000000 R15: ffff88800604be00 FS: 0000000000000000(0000) GS:ffff8880f3300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000050 CR3: 00000000e932e002 CR4: 00000000003706e0 Call Trace: blkback_changed+0x95/0x137 [xen_blkfront] ? read_reply+0x160/0x160 xenwatch_thread+0xc0/0x1a0 ? do_wait_intr_irq+0xa0/0xa0 kthread+0x16b/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore ipt_REJECT nf_reject_ipv4 xt_state xt_conntrack nft_counter nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_compat nf_tables nfnetlink intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel xen_netfront pcspkr xen_scsiback target_core_mod xen_netback xen_privcmd xen_gntdev xen_gntalloc xen_blkback xen_evtchn ipmi_devintf ipmi_msghandler fuse bpf_preload ip_tables overlay xen_blkfront CR2: 0000000000000050 ---[ end trace 7bc9597fd06ae89d ]--- RIP: 0010:blk_mq_stop_hw_queues+0x5/0x40 Code: 00 48 83 e0 fd 83 c3 01 48 89 85 a8 00 00 00 41 39 5c 24 50 77 c0 5b 5d 41 5c 41 5d c3 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <8b> 47 50 85 c0 74 32 41 54 49 89 fc 55 53 31 db 49 8b 44 24 48 48 RSP: 0018:ffffc90000bcfe98 EFLAGS: 00010293 RAX: ffffffffc0008370 RBX: 0000000000000005 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 RBP: ffff88800775f000 R08: 0000000000000001 R09: ffff888006e620b8 R10: ffff888006e620b0 R11: f000000000000000 R12: ffff8880bff39000 R13: ffff8880bff39000 R14: 0000000000000000 R15: ffff88800604be00 FS: 0000000000000000(0000) GS:ffff8880f3300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000050 CR3: 00000000e932e002 CR4: 00000000003706e0 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled info->rq and info->gd are only set in blkfront_connect(), which is called for state 4 (XenbusStateConnected). Guard against using NULL variables in blkfront_closing() to avoid the issue. The rest of blkfront_closing looks okay. If info->nr_rings is 0, then for_each_rinfo won't do anything. blkfront_remove also needs to check for non-NULL pointers before cleaning up the gendisk and request queue. Fixes: 05d69d950d9d "xen-blkfront: sanitize the removal state machine" Reported-by: Marek Marczykowski-Górecki Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220601195341.28581-1-jandryuk@gmail.com Signed-off-by: Juergen Gross --- drivers/block/xen-blkfront.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a88ce4426400..33f04ef78984 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2114,9 +2114,11 @@ static void blkfront_closing(struct blkfront_info *info) return; /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - blk_mark_disk_dead(info->gd); - set_capacity(info->gd, 0); + if (info->rq && info->gd) { + blk_mq_stop_hw_queues(info->rq); + blk_mark_disk_dead(info->gd); + set_capacity(info->gd, 0); + } for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ @@ -2457,16 +2459,19 @@ static int blkfront_remove(struct xenbus_device *xbdev) dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - del_gendisk(info->gd); + if (info->gd) + del_gendisk(info->gd); mutex_lock(&blkfront_mutex); list_del(&info->info_list); mutex_unlock(&blkfront_mutex); blkif_free(info, 0); - xlbd_release_minors(info->gd->first_minor, info->gd->minors); - blk_cleanup_disk(info->gd); - blk_mq_free_tag_set(&info->tag_set); + if (info->gd) { + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + } kfree(info); return 0; -- cgit v1.2.3 From ecb6237fa397b7b810d798ad19322eca466dbab1 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 17 Jun 2022 11:30:37 +0100 Subject: x86/xen: Remove undefined behavior in setup_features() 1 << 31 is undefined. So switch to 1U << 31. Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Signed-off-by: Julien Grall Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220617103037.57828-1-julien@xen.org Signed-off-by: Juergen Gross --- drivers/xen/features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 7b591443833c..87f1828d40d5 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -42,7 +42,7 @@ void xen_setup_features(void) if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1< Date: Mon, 9 May 2022 16:51:43 +0300 Subject: drm/xen: Add missing VM_DONTEXPAND flag in mmap callback With Xen PV Display driver in use the "expected" VM_DONTEXPAND flag is not set (neither explicitly nor implicitly), so the driver hits the code path in drm_gem_mmap_obj() which triggers the WARNING. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Oleksandr Andrushchenko Link: https://lore.kernel.org/r/1652104303-5098-1-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 5a5bf4e5b717..e31554d7139f 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -71,7 +71,7 @@ static int xen_drm_front_gem_object_mmap(struct drm_gem_object *gem_obj, * the whole buffer. */ vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; vma->vm_pgoff = 0; /* -- cgit v1.2.3 From c81aba8fde2aee4f5778ebab3a1d51bd2ef48e4c Mon Sep 17 00:00:00 2001 From: huhai Date: Fri, 10 Jun 2022 19:14:20 +0800 Subject: MIPS: Remove repetitive increase irq_err_count commit 979934da9e7a ("[PATCH] mips: update IRQ handling for vr41xx") added a function irq_dispatch, and it'll increase irq_err_count when the get_irq callback returns a negative value, but increase irq_err_count in get_irq was not removed. And also, modpost complains once gpio-vr41xx drivers become modules. ERROR: modpost: "irq_err_count" [drivers/gpio/gpio-vr41xx.ko] undefined! So it would be a good idea to remove repetitive increase irq_err_count in get_irq callback. Fixes: 27fdd325dace ("MIPS: Update VR41xx GPIO driver to use gpiolib") Fixes: 979934da9e7a ("[PATCH] mips: update IRQ handling for vr41xx") Reported-by: k2ci Signed-off-by: huhai Signed-off-by: Genjian Zhang Signed-off-by: Thomas Bogendoerfer --- arch/mips/vr41xx/common/icu.c | 2 -- drivers/gpio/gpio-vr41xx.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057..9240bcdbe74e 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c index 98cd715ccc33..8d09b619c166 100644 --- a/drivers/gpio/gpio-vr41xx.c +++ b/drivers/gpio/gpio-vr41xx.c @@ -217,8 +217,6 @@ static int giu_get_irq(unsigned int irq) printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n", maskl, pendl, maskh, pendh); - atomic_inc(&irq_err_count); - return -EINVAL; } -- cgit v1.2.3 From 4becf6417bbdc293734a590fe4ed38437bbcea2c Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 15 Jun 2022 22:11:23 +0800 Subject: arch: mips: generic: Add missing of_node_put() in board-ranchu.c In ranchu_measure_hpt_freq(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_put_node() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/generic/board-ranchu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/generic/board-ranchu.c b/arch/mips/generic/board-ranchu.c index a89aaad59cb1..930c45041882 100644 --- a/arch/mips/generic/board-ranchu.c +++ b/arch/mips/generic/board-ranchu.c @@ -44,6 +44,7 @@ static __init unsigned int ranchu_measure_hpt_freq(void) __func__); rtc_base = of_iomap(np, 0); + of_node_put(np); if (!rtc_base) panic("%s(): Failed to ioremap Goldfish RTC base!", __func__); -- cgit v1.2.3 From 608d94cb84c42585058d692f2fe5d327f8868cdb Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 16 Jun 2022 22:27:56 +0800 Subject: mips: mti-malta: Fix refcount leak in malta-time.c In update_gic_frequency_dt(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/mti-malta/malta-time.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index bbf1e38e1431..2cb708cdf01a 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -214,6 +214,8 @@ static void update_gic_frequency_dt(void) if (of_update_property(node, &gic_frequency_prop) < 0) pr_err("error updating gic frequency property\n"); + + of_node_put(node); } #endif -- cgit v1.2.3 From 48ca54e39173d1ed4c4dc8cf045484014bb26eaf Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 11:39:29 +0800 Subject: mips: ralink: Fix refcount leak in of.c In plat_of_remap_node(), plat_of_remap_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/ralink/of.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 587c7b998769..ea8072acf8d9 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -40,6 +40,8 @@ __iomem void *plat_of_remap_node(const char *node) if (of_address_to_resource(np, 0, &res)) panic("Failed to get resource for %s", node); + of_node_put(np); + if (!request_mem_region(res.start, resource_size(&res), res.name)) -- cgit v1.2.3 From 72a2af539fff975caadd9a4db3f99963569bd9c9 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 20:19:58 +0800 Subject: mips: lantiq: falcon: Fix refcount leak bug in sysctrl In ltq_soc_init(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/falcon/sysctrl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 5204fc6d6d50..1187729d8cbb 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -208,6 +208,12 @@ void __init ltq_soc_init(void) of_address_to_resource(np_sysgpe, 0, &res_sys[2])) panic("Failed to get core resources"); + of_node_put(np_status); + of_node_put(np_ebu); + of_node_put(np_sys1); + of_node_put(np_syseth); + of_node_put(np_sysgpe); + if ((request_mem_region(res_status.start, resource_size(&res_status), res_status.name) < 0) || (request_mem_region(res_ebu.start, resource_size(&res_ebu), -- cgit v1.2.3 From 76695592711ef1e215cc24ed3e1cd857d7fc3098 Mon Sep 17 00:00:00 2001 From: Liang He Date: Fri, 17 Jun 2022 20:25:05 +0800 Subject: mips: lantiq: xway: Fix refcount leak bug in sysctrl In ltq_soc_init(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/xway/sysctrl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 084f6caba5f2..d444a1b98a72 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -441,6 +441,10 @@ void __init ltq_soc_init(void) of_address_to_resource(np_ebu, 0, &res_ebu)) panic("Failed to get core resources"); + of_node_put(np_pmu); + of_node_put(np_cgu); + of_node_put(np_ebu); + if (!request_mem_region(res_pmu.start, resource_size(&res_pmu), res_pmu.name) || !request_mem_region(res_cgu.start, resource_size(&res_cgu), -- cgit v1.2.3 From eb9e9bc4fa5fb489c92ec588b3fb35f042ba6d86 Mon Sep 17 00:00:00 2001 From: Liang He Date: Sun, 19 Jun 2022 12:54:27 +0800 Subject: mips/pic32/pic32mzda: Fix refcount leak bugs of_find_matching_node(), of_find_compatible_node() and of_find_node_by_path() will return node pointers with refcout incremented. We should call of_node_put() when they are not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/pic32/pic32mzda/init.c | 7 ++++++- arch/mips/pic32/pic32mzda/time.c | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 129915616763..d9c8c4e46aff 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -98,13 +98,18 @@ static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup) np = of_find_compatible_node(NULL, NULL, lookup->compatible); if (np) { lookup->name = (char *)np->name; - if (lookup->phys_addr) + if (lookup->phys_addr) { + of_node_put(np); continue; + } if (!of_address_to_resource(np, 0, &res)) lookup->phys_addr = res.start; + of_node_put(np); } } + of_node_put(root); + return 0; } diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c index 7174e9abbb1b..777b515c52c8 100644 --- a/arch/mips/pic32/pic32mzda/time.c +++ b/arch/mips/pic32/pic32mzda/time.c @@ -32,6 +32,9 @@ static unsigned int pic32_xlate_core_timer_irq(void) goto default_map; irq = irq_of_parse_and_map(node, 0); + + of_node_put(node); + if (!irq) goto default_map; -- cgit v1.2.3 From db30dc1a5226eb74d52f748989e9a06451333678 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Fri, 17 Jun 2022 13:18:05 +0100 Subject: mips: dts: ingenic: Add TCU clock to x1000/x1830 tcu device node This clock is a gate for the TCU hardware block on these SoCs, but it wasn't included in the device tree since the ingenic-tcu driver erroneously did not request it. Reviewed-by: Paul Cercueil Signed-off-by: Aidan MacDonald Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ingenic/x1000.dtsi | 5 +++-- arch/mips/boot/dts/ingenic/x1830.dtsi | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index b0a034b468bb..42e69664efd9 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -111,8 +111,9 @@ clocks = <&cgu X1000_CLK_RTCLK>, <&cgu X1000_CLK_EXCLK>, - <&cgu X1000_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1000_CLK_PCLK>, + <&cgu X1000_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi index dbf21afaccb1..65a5da71c199 100644 --- a/arch/mips/boot/dts/ingenic/x1830.dtsi +++ b/arch/mips/boot/dts/ingenic/x1830.dtsi @@ -104,8 +104,9 @@ clocks = <&cgu X1830_CLK_RTCLK>, <&cgu X1830_CLK_EXCLK>, - <&cgu X1830_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1830_CLK_PCLK>, + <&cgu X1830_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; -- cgit v1.2.3 From 82c7863ed95d0914f02c7c8c011200a763bc6725 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Jun 2022 00:42:24 -0700 Subject: f2fs: do not count ENOENT for error case Otherwise, we can get a wrong cp_error mark. Cc: Fixes: a7b8618aa2f0 ("f2fs: avoid infinite loop to flush node pages") Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 836c79a20afc..cf6f7fc83c08 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1450,7 +1450,9 @@ page_hit: out_err: ClearPageUptodate(page); out_put_err: - f2fs_handle_page_eio(sbi, page->index, NODE); + /* ENOENT comes from read_node_page which is not an error. */ + if (err != -ENOENT) + f2fs_handle_page_eio(sbi, page->index, NODE); f2fs_put_page(page, 1); return ERR_PTR(err); } -- cgit v1.2.3 From 2645672ffe21f0a1c139bfbc05ad30fd4e4f2583 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Jun 2022 10:03:57 -0600 Subject: block: pop cached rq before potentially blocking rq_qos_throttle() If rq_qos_throttle() ends up blocking, then we will have invalidated and flushed our current plug. Since blk_mq_get_cached_request() hasn't popped the cached request off the plug list just yet, we end holding a pointer to a request that is no longer valid. This insta-crashes with rq->mq_hctx being NULL in the validity checks just after. Pop the request off the cached list before doing rq_qos_throttle() to avoid using a potentially stale request. Fixes: 0a5aa8d161d1 ("block: fix blk_mq_attempt_bio_merge and rq_qos_throttle protection") Reported-by: Dylan Yudaken Tested-by: Dylan Yudaken Signed-off-by: Jens Axboe --- block/blk-mq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 33145ba52c96..93d9d60980fb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2765,15 +2765,20 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, return NULL; } - rq_qos_throttle(q, *bio); - if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type) return NULL; if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) return NULL; - rq->cmd_flags = (*bio)->bi_opf; + /* + * If any qos ->throttle() end up blocking, we will have flushed the + * plug and hence killed the cached_rq list as well. Pop this entry + * before we throttle. + */ plug->cached_rq = rq_list_next(rq); + rq_qos_throttle(q, *bio); + + rq->cmd_flags = (*bio)->bi_opf; INIT_LIST_HEAD(&rq->queuelist); return rq; } -- cgit v1.2.3 From 9ae6e8b1c9bbf6874163d1243e393137313762b7 Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Tue, 21 Jun 2022 15:24:03 +0300 Subject: dm era: commit metadata in postsuspend after worker stops During postsuspend dm-era does the following: 1. Archives the current era 2. Commits the metadata, as part of the RPC call for archiving the current era 3. Stops the worker Until the worker stops, it might write to the metadata again. Moreover, these writes are not flushed to disk immediately, but are cached by the dm-bufio client, which writes them back asynchronously. As a result, the committed metadata of a suspended dm-era device might not be consistent with the in-core metadata. In some cases, this can result in the corruption of the on-disk metadata. Suppose the following sequence of events: 1. Load a new table, e.g. a snapshot-origin table, to a device with a dm-era table 2. Suspend the device 3. dm-era commits its metadata, but the worker does a few more metadata writes until it stops, as part of digesting an archived writeset 4. These writes are cached by the dm-bufio client 5. Load the dm-era table to another device. 6. The new instance of the dm-era target loads the committed, on-disk metadata, which don't include the extra writes done by the worker after the metadata commit. 7. Resume the new device 8. The new dm-era target instance starts using the metadata 9. Resume the original device 10. The destructor of the old dm-era target instance is called and destroys the dm-bufio client, which results in flushing the cached writes to disk 11. These writes might overwrite the writes done by the new dm-era instance, hence corrupting its metadata. Fix this by committing the metadata after the worker stops running. stop_worker uses flush_workqueue to flush the current work. However, the work item may re-queue itself and flush_workqueue doesn't wait for re-queued works to finish. This could result in the worker changing the metadata after they have been committed, or writing to the metadata concurrently with the commit in the postsuspend thread. Use drain_workqueue instead, which waits until the work and all re-queued works finish. Fixes: eec40579d8487 ("dm: add era target") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Nikos Tsironis Signed-off-by: Mike Snitzer --- drivers/md/dm-era-target.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 1f6bf152b3c7..e92c1afc3677 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1400,7 +1400,7 @@ static void start_worker(struct era *era) static void stop_worker(struct era *era) { atomic_set(&era->suspended, 1); - flush_workqueue(era->wq); + drain_workqueue(era->wq); } /*---------------------------------------------------------------- @@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_target *ti) } stop_worker(era); + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); + /* FIXME: fail mode */ + } } static int era_preresume(struct dm_target *ti) -- cgit v1.2.3 From 78ccef91234ba331c04d71f3ecb1377451d21056 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 21 Jun 2022 13:37:06 -0400 Subject: dm: do not return early from dm_io_complete if BLK_STS_AGAIN without polling Commit 5291984004edf ("dm: fix bio polling to handle possibile BLK_STS_AGAIN") inadvertently introduced an early return from dm_io_complete() without first queueing the bio to DM if BLK_STS_AGAIN occurs and bio-polling is _not_ being used. Fix this by only returning early from dm_io_complete() if the bio has first been properly queued to DM. Otherwise, the bio will never finish via bio_endio. Fixes: 5291984004edf ("dm: fix bio polling to handle possibile BLK_STS_AGAIN") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b6b25d319ef7..9ede55278eec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -939,9 +939,11 @@ static void dm_io_complete(struct dm_io *io) if (io_error == BLK_STS_AGAIN) { /* io_uring doesn't handle BLK_STS_AGAIN (yet) */ queue_io(md, bio); + return; } } - return; + if (io_error == BLK_STS_DM_REQUEUE) + return; } if (bio_is_flush_with_data(bio)) { -- cgit v1.2.3 From 202773260023b56e868d09d13d3a417028f1ff5b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 17 Jun 2022 15:24:02 +0300 Subject: PM: hibernate: Use kernel_can_power_off() Use new kernel_can_power_off() API instead of legacy pm_power_off global variable to fix regressed hibernation to disk where machine no longer powers off when it should because ACPI power driver transitioned to the new sys-off based API and it doesn't use pm_power_off anymore. Fixes: 98f30d0ecf79 ("ACPI: power: Switch to sys-off handler API") Tested-by: Ken Moffat Reported-by: Ken Moffat Signed-off-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 20a66bf9f465..89c71fce225d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -665,7 +665,7 @@ static void power_down(void) hibernation_platform_enter(); fallthrough; case HIBERNATION_SHUTDOWN: - if (pm_power_off) + if (kernel_can_power_off()) kernel_power_off(); break; } -- cgit v1.2.3 From 3748d2185ac4c2c6f80989672253aad909ecaf95 Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 21 Jun 2022 23:41:25 +0800 Subject: mips: lantiq: Add missing of_node_put() in irq.c In icu_of_init(), of_find_compatible_node() will return a node pointer with refcount incremented. We should use of_node_put() when it is not used anymore. Signed-off-by: Liang He Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index b732495f138a..20622bf0a9b3 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -408,6 +408,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) if (!ltq_eiu_membase) panic("Failed to remap eiu memory"); } + of_node_put(eiu_node); return 0; } -- cgit v1.2.3 From c487a5ad48831afa6784b368ec40d0ee50f2fe1b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 22 Jun 2022 00:00:35 +0100 Subject: io_uring: fail links when poll fails Don't forget to cancel all linked requests of poll request when __io_arm_poll_handler() failed. Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a78aad962460f9fdfe4aa4c0b62425c88f9415bc.1655852245.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index dffa85d4dc7a..d5ea3c6167b5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7405,6 +7405,8 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) ipt.pt._qproc = io_poll_queue_proc; ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); + if (!ret && ipt.error) + req_set_fail(req); ret = ret ?: ipt.error; if (ret) __io_req_complete(req, issue_flags, ret, 0); -- cgit v1.2.3 From 9d2ad2947a53abf5e5e6527a9eeed50a3a4cbc72 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 22 Jun 2022 00:00:36 +0100 Subject: io_uring: fix wrong arm_poll error handling Leaving ip.error set when a request was punted to task_work execution is problematic, don't forget to clear it. Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a6c84ef4182c6962380aebe11b35bdcb25b0ccfb.1655852245.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5ea3c6167b5..cb719a53b8bd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7145,6 +7145,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (unlikely(ipt->error || !ipt->nr_entries)) { poll->events |= EPOLLONESHOT; req->apoll_events |= EPOLLONESHOT; + ipt->error = 0; } __io_poll_execute(req, mask, poll->events); return 0; -- cgit v1.2.3 From c0737fa9a5a5cf5a053bcc983f72d58919b997c6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 22 Jun 2022 00:00:37 +0100 Subject: io_uring: fix double poll leak on repolling We have re-polling for partial IO, so a request can be polled twice. If it used two poll entries the first time then on the second io_arm_poll_handler() it will find the old apoll entry and NULL kmalloc()'ed second entry, i.e. apoll->double_poll, so leaking it. Fixes: 10c873334feba ("io_uring: allow re-poll if we made progress") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/fee2452494222ecc7f1f88c8fb659baef971414a.1655852245.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb719a53b8bd..5c95755619e2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7208,6 +7208,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) mask |= EPOLLEXCLUSIVE; if (req->flags & REQ_F_POLLED) { apoll = req->apoll; + kfree(apoll->double_poll); } else if (!(issue_flags & IO_URING_F_UNLOCKED) && !list_empty(&ctx->apoll_cache)) { apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, -- cgit v1.2.3 From ae60aac59a9ad8ab64a4b07de509a534a75b6bac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Jun 2022 10:58:55 +0200 Subject: USB: serial: pl2303: add support for more HXN (G) types Add support for further HXN (G) type devices (GT variant, GL variant, GS variant and GR) and document the bcdDevice mapping. Note that the TA and TB types use the same bcdDevice as some GT and GE variants, respectively, but that the HX status request can be used to determine which is which. Also note that we currently do not distinguish between the various HXN (G) types in the driver but that this may change eventually (e.g. when adding GPIO support). Reported-by: Charles Yeh Link: https://lore.kernel.org/r/YrF77b9DdeumUAee@hovoldconsulting.com Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 3506c47e1eef..40b1ab3d284d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -436,22 +436,27 @@ static int pl2303_detect_type(struct usb_serial *serial) break; case 0x200: switch (bcdDevice) { - case 0x100: + case 0x100: /* GC */ case 0x105: + return TYPE_HXN; + case 0x300: /* GT / TA */ + if (pl2303_supports_hx_status(serial)) + return TYPE_TA; + fallthrough; case 0x305: + case 0x400: /* GL */ case 0x405: + return TYPE_HXN; + case 0x500: /* GE / TB */ + if (pl2303_supports_hx_status(serial)) + return TYPE_TB; + fallthrough; + case 0x505: + case 0x600: /* GS */ case 0x605: - /* - * Assume it's an HXN-type if the device doesn't - * support the old read request value. - */ - if (!pl2303_supports_hx_status(serial)) - return TYPE_HXN; - break; - case 0x300: - return TYPE_TA; - case 0x500: - return TYPE_TB; + case 0x700: /* GR */ + case 0x705: + return TYPE_HXN; } break; } -- cgit v1.2.3 From 33b29dbb39bcbd0a96e440646396bbf670b914fa Mon Sep 17 00:00:00 2001 From: Yonglin Tan Date: Tue, 21 Jun 2022 20:37:53 +0800 Subject: USB: serial: option: add Quectel EM05-G modem The EM05-G modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030a Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030a Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Yonglin Tan Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 222b1e3d45a6..61d34886c706 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -252,6 +252,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 @@ -1134,6 +1135,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, -- cgit v1.2.3 From 5f940e528da6bce52a86fbdf881b76f60240aeaf Mon Sep 17 00:00:00 2001 From: Saud Farooqui Date: Thu, 9 Jun 2022 23:50:31 +0500 Subject: drm/vc4: hdmi: Fixed possible integer overflow Multiplying ints and saving it in unsigned long long could lead to integer overflow before being type casted to unsigned long long. Addresses-Coverity: 1505113: Unintentional integer overflow. Signed-off-by: Saud Farooqui Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/PA4P189MB1421E63C0FF3EBF234A80AB38BA79@PA4P189MB1421.EURP189.PROD.OUTLOOK.COM --- drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 823d812f4982..ce9d16666d91 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1481,7 +1481,7 @@ vc4_hdmi_encoder_compute_mode_clock(const struct drm_display_mode *mode, unsigned int bpc, enum vc4_hdmi_output_format fmt) { - unsigned long long clock = mode->clock * 1000; + unsigned long long clock = mode->clock * 1000ULL; if (mode->flags & DRM_MODE_FLAG_DBLCLK) clock = clock * 2; -- cgit v1.2.3 From f5aa16807aa4f99293044944590dde81364f434f Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 20 Jun 2022 20:13:33 +0200 Subject: drm/sun4i: Add DMA mask and segment size Kernel occasionally complains that there is mismatch in segment size when trying to render HW decoded videos and rendering them directly with sun4i DRM driver. Following message can be observed on H6 SoC: [ 184.298308] ------------[ cut here ]------------ [ 184.298326] DMA-API: sun4i-drm display-engine: mapping sg segment longer than device claims to support [len=6144000] [max=65536] [ 184.298364] WARNING: CPU: 1 PID: 382 at kernel/dma/debug.c:1162 debug_dma_map_sg+0x2b0/0x350 [ 184.322997] CPU: 1 PID: 382 Comm: ffmpeg Not tainted 5.19.0-rc1+ #1331 [ 184.329533] Hardware name: Tanix TX6 (DT) [ 184.333544] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 184.340512] pc : debug_dma_map_sg+0x2b0/0x350 [ 184.344882] lr : debug_dma_map_sg+0x2b0/0x350 [ 184.349250] sp : ffff800009f33a50 [ 184.352567] x29: ffff800009f33a50 x28: 0000000000010000 x27: ffff000001b86c00 [ 184.359725] x26: ffffffffffffffff x25: ffff000005d8cc80 x24: 0000000000000000 [ 184.366879] x23: ffff80000939ab18 x22: 0000000000000001 x21: 0000000000000001 [ 184.374031] x20: 0000000000000000 x19: ffff0000018a7410 x18: ffffffffffffffff [ 184.381186] x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff [ 184.388338] x14: 0000000000000001 x13: ffff800009534e86 x12: 6f70707573206f74 [ 184.395493] x11: 20736d69616c6320 x10: 000000000000000a x9 : 0000000000010000 [ 184.402647] x8 : ffff8000093b6d40 x7 : ffff800009f33850 x6 : 000000000000000c [ 184.409800] x5 : ffff0000bf997940 x4 : 0000000000000000 x3 : 0000000000000027 [ 184.416953] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000003960e80 [ 184.424106] Call trace: [ 184.426556] debug_dma_map_sg+0x2b0/0x350 [ 184.430580] __dma_map_sg_attrs+0xa0/0x110 [ 184.434687] dma_map_sgtable+0x28/0x4c [ 184.438447] vb2_dc_dmabuf_ops_map+0x60/0xcc [ 184.442729] __map_dma_buf+0x2c/0xd4 [ 184.446321] dma_buf_map_attachment+0xa0/0x130 [ 184.450777] drm_gem_prime_import_dev+0x7c/0x18c [ 184.455410] drm_gem_prime_fd_to_handle+0x1b8/0x214 [ 184.460300] drm_prime_fd_to_handle_ioctl+0x2c/0x40 [ 184.465190] drm_ioctl_kernel+0xc4/0x174 [ 184.469123] drm_ioctl+0x204/0x420 [ 184.472534] __arm64_sys_ioctl+0xac/0xf0 [ 184.476474] invoke_syscall+0x48/0x114 [ 184.480240] el0_svc_common.constprop.0+0x44/0xec [ 184.484956] do_el0_svc+0x2c/0xc0 [ 184.488283] el0_svc+0x2c/0x84 [ 184.491354] el0t_64_sync_handler+0x11c/0x150 [ 184.495723] el0t_64_sync+0x18c/0x190 [ 184.499397] ---[ end trace 0000000000000000 ]--- Fix that by setting DMA mask and segment size. Signed-off-by: Jernej Skrabec Reviewed-by: Samuel Holland Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220620181333.650301-1-jernej.skrabec@gmail.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 8841dba989ee..6eb1aabdb161 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -369,6 +370,13 @@ static int sun4i_drv_probe(struct platform_device *pdev) INIT_KFIFO(list.fifo); + /* + * DE2 and DE3 cores actually supports 40-bit addresses, but + * driver does not. + */ + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + for (i = 0;; i++) { struct device_node *pipeline = of_parse_phandle(np, "allwinner,pipelines", -- cgit v1.2.3 From f3eac426657d985b97c92fa5f7ae1d43f04721f3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 21 Jun 2022 16:08:49 +0200 Subject: powerpc/powernv: wire up rng during setup_arch The platform's RNG must be available before random_init() in order to be useful for initial seeding, which in turn means that it needs to be called from setup_arch(), rather than from an init call. Complicating things, however, is that POWER8 systems need some per-cpu state and kmalloc, which isn't available at this stage. So we split things up into an early phase and a later opportunistic phase. This commit also removes some noisy log messages that don't add much. Fixes: a4da0d50b2a0 ("powerpc: Implement arch_get_random_long/int() for powernv") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Christophe Leroy [mpe: Add of_node_put(), use pnv naming, minor change log editing] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220621140849.127227-1-Jason@zx2c4.com --- arch/powerpc/platforms/powernv/powernv.h | 2 ++ arch/powerpc/platforms/powernv/rng.c | 52 ++++++++++++++++++++++---------- arch/powerpc/platforms/powernv/setup.c | 2 ++ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index e297bf4abfcb..866efdc103fd 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -42,4 +42,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 __init memcons_get_size(struct memcons *mc); struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index e3d44b36ae98..463c78c52cc5 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include #include #include +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -98,9 +98,6 @@ static int __init initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -163,32 +160,55 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; + + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); + if (rng_create(dn)) continue; - } - /* Create devices for hwrng driver */ of_platform_device_create(dn, NULL, NULL); } - initialise_darn(); + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + unsigned long v; + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 824c3ad7a0fa..dac545aa0308 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -203,6 +203,8 @@ static void __init pnv_setup_arch(void) pnv_check_guarded_cores(); /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) -- cgit v1.2.3 From 485037ae9a095491beb7f893c909a76cc4f9d1e7 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Mon, 20 Jun 2022 21:05:56 +0100 Subject: regmap-irq: Fix a bug in regmap_irq_enable() for type_in_mask chips When enabling a type_in_mask irq, the type_buf contents must be AND'd with the mask of the IRQ we're enabling to avoid enabling other IRQs by accident, which can happen if several type_in_mask irqs share a mask register. Fixes: bc998a730367 ("regmap: irq: handle HW using separate rising/falling edge interrupts") Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220620200644.1961936-2-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 400c7412a7dc..4f785bc7981c 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -252,6 +252,7 @@ static void regmap_irq_enable(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + unsigned int reg = irq_data->reg_offset / map->reg_stride; unsigned int mask, type; type = irq_data->type.type_falling_val | irq_data->type.type_rising_val; @@ -268,14 +269,14 @@ static void regmap_irq_enable(struct irq_data *data) * at the corresponding offset in regmap_irq_set_type(). */ if (d->chip->type_in_mask && type) - mask = d->type_buf[irq_data->reg_offset / map->reg_stride]; + mask = d->type_buf[reg] & irq_data->mask; else mask = irq_data->mask; if (d->chip->clear_on_unmask) d->clear_status = true; - d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask; + d->mask_buf[reg] &= ~mask; } static void regmap_irq_disable(struct irq_data *data) -- cgit v1.2.3 From 3f05010f243be06478a9b11cfce0ce994f5a0890 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Mon, 20 Jun 2022 21:05:57 +0100 Subject: regmap-irq: Fix offset/index mismatch in read_sub_irq_data() We need to divide the sub-irq status register offset by register stride to get an index for the status buffer to avoid an out of bounds write when the register stride is greater than 1. Fixes: a2d21848d921 ("regmap: regmap-irq: Add main status register support") Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220620200644.1961936-3-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4f785bc7981c..a6db605707b0 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -387,6 +387,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, subreg = &chip->sub_reg_offsets[b]; for (i = 0; i < subreg->num_regs; i++) { unsigned int offset = subreg->offset[i]; + unsigned int index = offset / map->reg_stride; if (chip->not_fixed_stride) ret = regmap_read(map, @@ -395,7 +396,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, else ret = regmap_read(map, chip->status_base + offset, - &data->status_buf[offset]); + &data->status_buf[index]); if (ret) break; -- cgit v1.2.3 From 9f7d09fe23a0112c08d2326d9116fccb5a912660 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 17 Jun 2022 10:01:07 +0900 Subject: iommu/ipmmu-vmsa: Fix compatible for rcar-gen4 Fix compatible string for R-Car Gen4. Fixes: ae684caf465b ("iommu/ipmmu-vmsa: Add support for R-Car Gen4") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220617010107.3229784-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 8fdb84b3642b..1d42084d0276 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -987,7 +987,7 @@ static const struct of_device_id ipmmu_of_ids[] = { .compatible = "renesas,ipmmu-r8a779a0", .data = &ipmmu_features_rcar_gen4, }, { - .compatible = "renesas,rcar-gen4-ipmmu", + .compatible = "renesas,rcar-gen4-ipmmu-vmsa", .data = &ipmmu_features_rcar_gen4, }, { /* Terminator */ -- cgit v1.2.3 From 3026b5ca06fa872147f9726ca7c1f658bae71abf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Jun 2022 15:25:15 +0300 Subject: drm/vc4: fix error code in vc4_check_tex_size() The vc4_check_tex_size() function is supposed to return false on error but this error path accidentally returns -ENODEV (which means true). Fixes: 30f8c74ca9b7 ("drm/vc4: Warn if some v3d code is run on BCM2711") Signed-off-by: Dan Carpenter Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/YrMKK89/viQiaiAg@kili --- drivers/gpu/drm/vc4/vc4_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 833eb623d545..2feba55bcef7 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -170,7 +170,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, uint32_t utile_h = utile_height(cpp); if (WARN_ON_ONCE(vc4->is_vc5)) - return -ENODEV; + return false; /* The shaded vertex format stores signed 12.4 fixed point * (-2048,2047) offsets from the viewport center, so we should -- cgit v1.2.3 From 85016f66af8506cb601fd4f4fde23ed327a266be Mon Sep 17 00:00:00 2001 From: Saud Farooqui Date: Wed, 22 Jun 2022 13:59:17 +0500 Subject: drm/sun4i: Return if frontend is not present Added return statement in sun4i_layer_format_mod_supported() in case frontend is not present. Signed-off-by: Saud Farooqui Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/PA4P189MB1421E93EF5F8E8E00E71B7878BB29@PA4P189MB1421.EURP189.PROD.OUTLOOK.COM --- drivers/gpu/drm/sun4i/sun4i_layer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index 6d43080791a0..85fb9e800ddf 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -117,7 +117,7 @@ static bool sun4i_layer_format_mod_supported(struct drm_plane *plane, struct sun4i_layer *layer = plane_to_sun4i_layer(plane); if (IS_ERR_OR_NULL(layer->backend->frontend)) - sun4i_backend_format_is_supported(format, modifier); + return sun4i_backend_format_is_supported(format, modifier); return sun4i_backend_format_is_supported(format, modifier) || sun4i_frontend_format_is_supported(format, modifier); -- cgit v1.2.3 From bdc48fd571a7bb0aecb9d101d78b29a05f217c72 Mon Sep 17 00:00:00 2001 From: Jiang Jian Date: Tue, 21 Jun 2022 14:49:04 +0800 Subject: video: fbdev: omapfb: Align '*' in comment Consider '*' alignment in comments Signed-off-by: Jiang Jian Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/dss/hdmi_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi_phy.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi_phy.c index 6fbfeb01b315..170463a7e1f4 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi_phy.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi_phy.c @@ -143,7 +143,7 @@ int hdmi_phy_configure(struct hdmi_phy_data *phy, unsigned long hfbitclk, /* * In OMAP5+, the HFBITCLK must be divided by 2 before issuing the * HDMI_PHYPWRCMD_LDOON command. - */ + */ if (phy_feat->bist_ctrl) REG_FLD_MOD(phy->base, HDMI_TXPHY_BIST_CONTROL, 1, 11, 11); -- cgit v1.2.3 From cb5177336ecb07fe1c6804306fe8efc827643c64 Mon Sep 17 00:00:00 2001 From: Jiang Jian Date: Wed, 22 Jun 2022 02:55:38 +0800 Subject: video: fbdev: omap: Remove duplicate 'the' in comment Signed-off-by: Jiang Jian Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/sossi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap/sossi.c b/drivers/video/fbdev/omap/sossi.c index c90eb8ca58af..66aff6cd1df0 100644 --- a/drivers/video/fbdev/omap/sossi.c +++ b/drivers/video/fbdev/omap/sossi.c @@ -359,7 +359,7 @@ static void sossi_set_bits_per_cycle(int bpc) int bus_pick_count, bus_pick_width; /* - * We set explicitly the the bus_pick_count as well, although + * We set explicitly the bus_pick_count as well, although * with remapping/reordering disabled it will be calculated by HW * as (32 / bus_pick_width). */ -- cgit v1.2.3 From f15345a377c6ea9c7cc74f079616af8856aff37f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 19 May 2022 10:21:08 -0400 Subject: drm/amdgpu: Adjust logic around GTT size (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain GL unit tests for large textures can cause problems with the OOM killer since there is no way to link this memory to a process. This was originally mitigated (but not necessarily eliminated) by limiting the GTT size. The problem is this limit is often too low for many modern games so just make the limit 1/2 of system memory. The OOM accounting needs to be addressed, but we shouldn't prevent common 3D applications from being usable just to potentially mitigate that corner case. Set default GTT size to max(3G, 1/2 of system ram) by default. v2: drop previous logic and default to 3/4 of ram v3: default to half of ram to align with ttm v4: fix spelling in comment (Kent) Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1942 Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index be6f76a30ac6..3b4c19412625 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - /* Compute GTT size, either bsaed on 3/4th the size of RAM size + /* Compute GTT size, either based on 1/2 the size of RAM size * or whatever the user passed on module init */ if (amdgpu_gtt_size == -1) { struct sysinfo si; si_meminfo(&si); - gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->gmc.mc_vram_size), - ((uint64_t)si.totalram * si.mem_unit * 3/4)); - } - else + /* Certain GL unit tests for large textures can cause problems + * with the OOM killer since there is no way to link this memory + * to a process. This was originally mitigated (but not necessarily + * eliminated) by limiting the GTT size. The problem is this limit + * is often too low for many modern games so just make the limit 1/2 + * of system memory which aligns with TTM. The OOM accounting needs + * to be addressed, but we shouldn't prevent common 3D applications + * from being usable just to potentially mitigate that corner case. + */ + gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + (u64)si.totalram * si.mem_unit / 2); + } else { gtt_size = (uint64_t)amdgpu_gtt_size << 20; + } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); -- cgit v1.2.3 From 937e24b7f5595566a64e0f856ebab9147f2e4d1b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 15 Jun 2022 17:30:05 -0500 Subject: drm/amd: Revert "drm/amd/display: keep eDP Vdd on when eDP stream is already enabled" A variety of Lenovo machines with Rembrandt APUs and OLED panels have stopped showing the display at login. This behavior clears up after leaving it idle and moving the mouse or touching keyboard. It was bisected to be caused by commit 559e2655220d ("drm/amd/display: keep eDP Vdd on when eDP stream is already enabled"). Revert this commit to fix the issue. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2047 Reported-by: Aaron Ma Fixes: 559e2655220d ("drm/amd/display: keep eDP Vdd on when eDP stream is already enabled") Signed-off-by: Mario Limonciello Tested-by: Mark Pearson Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../amd/display/dc/dce110/dce110_hw_sequencer.c | 24 ++-------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7eff7811769d..5f2afa5b4814 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1766,29 +1766,9 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) break; } } - - /* - * TO-DO: So far the code logic below only addresses single eDP case. - * For dual eDP case, there are a few things that need to be - * implemented first: - * - * 1. Change the fastboot logic above, so eDP link[0 or 1]'s - * stream[0 or 1] will all be checked. - * - * 2. Change keep_edp_vdd_on to an array, and maintain keep_edp_vdd_on - * for each eDP. - * - * Once above 2 things are completed, we can then change the logic below - * correspondingly, so dual eDP case will be fully covered. - */ - - // We are trying to enable eDP, don't power down VDD if eDP stream is existing - if ((edp_stream_num == 1 && edp_streams[0] != NULL) || can_apply_edp_fast_boot) { + // We are trying to enable eDP, don't power down VDD + if (can_apply_edp_fast_boot) keep_edp_vdd_on = true; - DC_LOG_EVENT_LINK_TRAINING("Keep eDP Vdd on\n"); - } else { - DC_LOG_EVENT_LINK_TRAINING("No eDP stream enabled, turn eDP Vdd off\n"); - } } // Check seamless boot support -- cgit v1.2.3 From 235870f659687b48b12c28f9427e6ca39dcaa81e Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Fri, 10 Jun 2022 10:43:53 -0400 Subject: drm/amd/display: Fix DC warning at driver load [Why] Wrong index was checked for dcfclk_mhz, causing false warning. [How] Fix the assertion index. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: Qingqing Zhuo Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.18.x --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index fb4ae800e919..f4381725b210 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -550,7 +550,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params( if (!bw_params->clk_table.entries[i].dtbclk_mhz) bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; } - ASSERT(bw_params->clk_table.entries[i].dcfclk_mhz); + ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; if (!bw_params->num_channels) -- cgit v1.2.3 From 98b02e9f002b21944176774cf420c4d674f6201c Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 7 Oct 2021 15:59:44 -0400 Subject: drm/amd/display: Fix typo in override_lane_settings [Why] The function currently skips overriding the drive settings of the first lane. [How] Change for loop to start at 0 instead of 1. Tested-by: Daniel Wheeler Reviewed-by: Wenjing Liu Acked-by: Alan Liu Signed-off-by: George Shen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cbc47aecd00f..d8eee89e63ce 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -944,7 +944,7 @@ static void override_lane_settings(const struct link_training_settings *lt_setti return; - for (lane = 1; lane < LANE_COUNT_DP_MAX; lane++) { + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { if (lt_settings->voltage_swing) lane_settings[lane].VOLTAGE_SWING = *lt_settings->voltage_swing; if (lt_settings->pre_emphasis) -- cgit v1.2.3 From e84131a88a8cdcd6fe9f234ed98e3f8ca049142b Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Jun 2022 01:21:27 +0000 Subject: amd/display/dc: Fix COLOR_ENCODING and COLOR_RANGE doing nothing for DCN20+ For DCN20 and above, the code that actually hooks up the provided input_color_space got lost at some point. Fixes COLOR_ENCODING and COLOR_RANGE doing nothing on DCN20+. Tested using Steam Remote Play Together + gamescope. Update other DCNs the same wasy DCN1.x was updates in commit a1e07ba89d49 ("drm/amd/display: Use plane->color_space for dpp if specified") Fixes: a1e07ba89d49 ("drm/amd/display: Use plane->color_space for dpp if specified") Signed-off-by: Joshua Ashton Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c | 3 +++ drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c | 3 +++ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 970b65efeac1..eaa7032f0f1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -212,6 +212,9 @@ static void dpp2_cnv_setup ( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c index 8b6505b7dca8..f50ab961bc17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c @@ -153,6 +153,9 @@ static void dpp201_cnv_setup( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index ab3918c0a15b..0dcc07531643 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -294,6 +294,9 @@ static void dpp3_cnv_setup ( break; } + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + if (is_2bit == 1 && alpha_2bit_lut != NULL) { REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); -- cgit v1.2.3 From 9de74996a739bf0b7b5d8c260bd207ad6007442b Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 22 Jun 2022 12:36:36 -0500 Subject: smb3: use netname when available on secondary channels Some servers do not allow null netname contexts, which would cause multichannel to revert to single channel when mounting to some servers (e.g. Azure xSMB). The previous patch fixed that by avoiding incorrectly sending the netname context when there would be a null hostname sent in the netname context, while this patch fixes the null hostname for the secondary channel by using the hostname of the primary channel for the secondary channel. Fixes: 4c14d7043fede ("cifs: populate empty hostnames for extra channels") Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5e8c4737b183..12b4dddaedb0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -543,6 +543,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { char *pneg_ctxt; + char *hostname = NULL; unsigned int ctxt_len, neg_context_count; if (*total_len > 200) { @@ -574,9 +575,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += sizeof(struct smb2_posix_neg_context); pneg_ctxt += sizeof(struct smb2_posix_neg_context); - if (server->hostname && (server->hostname[0] != 0)) { + /* + * secondary channels don't have the hostname field populated + * use the hostname field in the primary channel instead + */ + hostname = CIFS_SERVER_IS_CHAN(server) ? + server->primary_server->hostname : server->hostname; + if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, - server->hostname); + hostname); *total_len += ctxt_len; pneg_ctxt += ctxt_len; neg_context_count = 4; -- cgit v1.2.3 From aa45dadd34e44fcd6a9df4b395bee5b5633b4cec Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sat, 1 Jan 2022 12:50:21 +0000 Subject: cifs: change iface_list from array to sorted linked list A server's published interface list can change over time, and needs to be updated. We've storing iface_list as a simple array, which makes it difficult to manipulate an existing list. With this change, iface_list is modified into a linked list of interfaces, which is kept sorted by speed. Also added a reference counter for an iface entry, so that each channel can maintain a backpointer to the iface and drop it easily when needed. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 12 ++-- fs/cifs/cifsglob.h | 54 +++++++++++++++- fs/cifs/connect.c | 6 +- fs/cifs/misc.c | 9 ++- fs/cifs/sess.c | 78 +++++++++++------------ fs/cifs/smb2ops.c | 171 +++++++++++++++++++++++++++------------------------ 6 files changed, 201 insertions(+), 129 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 1dd995efd5b8..2cfbac8bb965 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -162,6 +162,8 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr); else if (iface->sockaddr.ss_family == AF_INET6) seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr); + if (!iface->is_active) + seq_puts(m, "\t\t[for-cleanup]\n"); } static int cifs_debug_files_proc_show(struct seq_file *m, void *v) @@ -221,6 +223,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; + struct cifs_server_iface *iface; int c, i, j; seq_puts(m, @@ -456,11 +459,10 @@ skip_rdma: if (ses->iface_count) seq_printf(m, "\n\n\tServer interfaces: %zu", ses->iface_count); - for (j = 0; j < ses->iface_count; j++) { - struct cifs_server_iface *iface; - - iface = &ses->iface_list[j]; - seq_printf(m, "\n\t%d)", j+1); + j = 0; + list_for_each_entry(iface, &ses->iface_list, + iface_head) { + seq_printf(m, "\n\t%d)", ++j); cifs_dump_iface(m, iface); if (is_ses_using_iface(ses, iface)) seq_puts(m, "\t\t[CONNECTED]\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e7737166e5b8..7fc2addc95a3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -933,15 +933,67 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) #endif struct cifs_server_iface { + struct list_head iface_head; + struct kref refcount; size_t speed; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; + unsigned int is_active : 1; /* unset if non existent */ struct sockaddr_storage sockaddr; }; +/* release iface when last ref is dropped */ +static inline void +release_iface(struct kref *ref) +{ + struct cifs_server_iface *iface = container_of(ref, + struct cifs_server_iface, + refcount); + list_del_init(&iface->iface_head); + kfree(iface); +} + +/* + * compare two interfaces a and b + * return 0 if everything matches. + * return 1 if a has higher link speed, or rdma capable, or rss capable + * return -1 otherwise. + */ +static inline int +iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) +{ + int cmp_ret = 0; + + WARN_ON(!a || !b); + if (a->speed == b->speed) { + if (a->rdma_capable == b->rdma_capable) { + if (a->rss_capable == b->rss_capable) { + cmp_ret = memcmp(&a->sockaddr, &b->sockaddr, + sizeof(a->sockaddr)); + if (!cmp_ret) + return 0; + else if (cmp_ret > 0) + return 1; + else + return -1; + } else if (a->rss_capable > b->rss_capable) + return 1; + else + return -1; + } else if (a->rdma_capable > b->rdma_capable) + return 1; + else + return -1; + } else if (a->speed > b->speed) + return 1; + else + return -1; +} + struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; + struct cifs_server_iface *iface; /* interface in use */ __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; @@ -993,7 +1045,7 @@ struct cifs_ses { */ spinlock_t iface_lock; /* ========= begin: protected by iface_lock ======== */ - struct cifs_server_iface *iface_list; + struct list_head iface_list; size_t iface_count; unsigned long iface_last_update; /* jiffies */ /* ========= end: protected by iface_lock ======== */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1849e3411487..248fe1805c17 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1894,9 +1894,11 @@ void cifs_put_smb_ses(struct cifs_ses *ses) int i; for (i = 1; i < chan_count; i++) { - spin_unlock(&ses->chan_lock); + if (ses->chans[i].iface) { + kref_put(&ses->chans[i].iface->refcount, release_iface); + ses->chans[i].iface = NULL; + } cifs_put_tcp_session(ses->chans[i].server, 0); - spin_lock(&ses->chan_lock); ses->chans[i].server = NULL; } } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index c69e1240d730..0e84e6fcf8ab 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -75,6 +75,7 @@ sesInfoAlloc(void) INIT_LIST_HEAD(&ret_buf->tcon_list); mutex_init(&ret_buf->session_mutex); spin_lock_init(&ret_buf->iface_lock); + INIT_LIST_HEAD(&ret_buf->iface_list); spin_lock_init(&ret_buf->chan_lock); } return ret_buf; @@ -83,6 +84,8 @@ sesInfoAlloc(void) void sesInfoFree(struct cifs_ses *buf_to_free) { + struct cifs_server_iface *iface = NULL, *niface = NULL; + if (buf_to_free == NULL) { cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n"); return; @@ -96,7 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); kfree_sensitive(buf_to_free->auth_key.response); - kfree(buf_to_free->iface_list); + spin_lock(&buf_to_free->iface_lock); + list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list, + iface_head) + kref_put(&iface->refcount, release_iface); + spin_unlock(&buf_to_free->iface_lock); kfree_sensitive(buf_to_free); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d417de354d9d..d7b6e5687df2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -58,7 +58,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { - if (is_server_using_iface(ses->chans[i].server, iface)) { + if (ses->chans[i].iface == iface) { spin_unlock(&ses->chan_lock); return true; } @@ -151,11 +151,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { int old_chan_count, new_chan_count; int left; - int i = 0; int rc = 0; int tries = 0; - struct cifs_server_iface *ifaces = NULL; - size_t iface_count; + struct cifs_server_iface *iface = NULL, *niface = NULL; spin_lock(&ses->chan_lock); @@ -184,33 +182,17 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } spin_unlock(&ses->chan_lock); - /* - * Make a copy of the iface list at the time and use that - * instead so as to not hold the iface spinlock for opening - * channels - */ - spin_lock(&ses->iface_lock); - iface_count = ses->iface_count; - if (iface_count <= 0) { - spin_unlock(&ses->iface_lock); - cifs_dbg(VFS, "no iface list available to open channels\n"); - return 0; - } - ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces), - GFP_ATOMIC); - if (!ifaces) { - spin_unlock(&ses->iface_lock); - return 0; - } - spin_unlock(&ses->iface_lock); - /* * Keep connecting to same, fastest, iface for all channels as * long as its RSS. Try next fastest one if not RSS or channel * creation fails. */ + spin_lock(&ses->iface_lock); + iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + spin_unlock(&ses->iface_lock); + while (left > 0) { - struct cifs_server_iface *iface; tries++; if (tries > 3*ses->chan_max) { @@ -219,27 +201,46 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) break; } - iface = &ifaces[i]; - if (is_ses_using_iface(ses, iface) && !iface->rss_capable) { - i = (i+1) % iface_count; - continue; + spin_lock(&ses->iface_lock); + if (!ses->iface_count) { + spin_unlock(&ses->iface_lock); + break; } - rc = cifs_ses_add_channel(cifs_sb, ses, iface); - if (rc) { - cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n", - i, rc); - i = (i+1) % iface_count; - continue; + list_for_each_entry_safe_from(iface, niface, &ses->iface_list, + iface_head) { + /* skip ifaces that are unusable */ + if (!iface->is_active || + (is_ses_using_iface(ses, iface) && + !iface->rss_capable)) { + continue; + } + + /* take ref before unlock */ + kref_get(&iface->refcount); + + spin_unlock(&ses->iface_lock); + rc = cifs_ses_add_channel(cifs_sb, ses, iface); + spin_lock(&ses->iface_lock); + + if (rc) { + cifs_dbg(VFS, "failed to open extra channel on iface:%pIS rc=%d\n", + &iface->sockaddr, + rc); + kref_put(&iface->refcount, release_iface); + continue; + } + + cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", + &iface->sockaddr); + break; } + spin_unlock(&ses->iface_lock); - cifs_dbg(FYI, "successfully opened new channel on iface#%d\n", - i); left--; new_chan_count++; } - kfree(ifaces); return new_chan_count - old_chan_count; } @@ -355,6 +356,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, spin_unlock(&ses->chan_lock); goto out; } + chan->iface = iface; ses->chan_count++; atomic_set(&ses->chan_seq, 0); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 8543cafdfd34..db4ccf1d235e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -512,73 +512,41 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) static int parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, size_t buf_len, - struct cifs_server_iface **iface_list, - size_t *iface_count) + struct cifs_ses *ses) { struct network_interface_info_ioctl_rsp *p; struct sockaddr_in *addr4; struct sockaddr_in6 *addr6; struct iface_info_ipv4 *p4; struct iface_info_ipv6 *p6; - struct cifs_server_iface *info; + struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL; + struct cifs_server_iface tmp_iface; ssize_t bytes_left; size_t next = 0; int nb_iface = 0; - int rc = 0; - - *iface_list = NULL; - *iface_count = 0; - - /* - * Fist pass: count and sanity check - */ + int rc = 0, ret = 0; bytes_left = buf_len; p = buf; - while (bytes_left >= sizeof(*p)) { - nb_iface++; - next = le32_to_cpu(p->Next); - if (!next) { - bytes_left -= sizeof(*p); - break; - } - p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); - bytes_left -= next; - } - - if (!nb_iface) { - cifs_dbg(VFS, "%s: malformed interface info\n", __func__); - rc = -EINVAL; - goto out; - } - - /* Azure rounds the buffer size up 8, to a 16 byte boundary */ - if ((bytes_left > 8) || p->Next) - cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); - + spin_lock(&ses->iface_lock); /* - * Second pass: extract info to internal structure + * Go through iface_list and do kref_put to remove + * any unused ifaces. ifaces in use will be removed + * when the last user calls a kref_put on it */ - - *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL); - if (!*iface_list) { - rc = -ENOMEM; - goto out; + list_for_each_entry_safe(iface, niface, &ses->iface_list, + iface_head) { + iface->is_active = 0; + kref_put(&iface->refcount, release_iface); } + spin_unlock(&ses->iface_lock); - info = *iface_list; - bytes_left = buf_len; - p = buf; while (bytes_left >= sizeof(*p)) { - info->speed = le64_to_cpu(p->LinkSpeed); - info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; - info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0; - - cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count); - cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); - cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__, - le32_to_cpu(p->Capability)); + memset(&tmp_iface, 0, sizeof(tmp_iface)); + tmp_iface.speed = le64_to_cpu(p->LinkSpeed); + tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; + tmp_iface.rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0; switch (p->Family) { /* @@ -587,7 +555,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, * conversion explicit in case either one changes. */ case INTERNETWORK: - addr4 = (struct sockaddr_in *)&info->sockaddr; + addr4 = (struct sockaddr_in *)&tmp_iface.sockaddr; p4 = (struct iface_info_ipv4 *)p->Buffer; addr4->sin_family = AF_INET; memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); @@ -599,7 +567,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, &addr4->sin_addr); break; case INTERNETWORKV6: - addr6 = (struct sockaddr_in6 *)&info->sockaddr; + addr6 = (struct sockaddr_in6 *)&tmp_iface.sockaddr; p6 = (struct iface_info_ipv6 *)p->Buffer; addr6->sin6_family = AF_INET6; memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16); @@ -619,36 +587,88 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto next_iface; } - (*iface_count)++; - info++; + /* + * The iface_list is assumed to be sorted by speed. + * Check if the new interface exists in that list. + * NEVER change iface. it could be in use. + * Add a new one instead + */ + spin_lock(&ses->iface_lock); + iface = niface = NULL; + list_for_each_entry_safe(iface, niface, &ses->iface_list, + iface_head) { + ret = iface_cmp(iface, &tmp_iface); + if (!ret) { + /* just get a ref so that it doesn't get picked/freed */ + iface->is_active = 1; + kref_get(&iface->refcount); + spin_unlock(&ses->iface_lock); + goto next_iface; + } else if (ret < 0) { + /* all remaining ifaces are slower */ + kref_get(&iface->refcount); + break; + } + } + spin_unlock(&ses->iface_lock); + + /* no match. insert the entry in the list */ + info = kmalloc(sizeof(struct cifs_server_iface), + GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto out; + } + memcpy(info, &tmp_iface, sizeof(tmp_iface)); + + /* add this new entry to the list */ + kref_init(&info->refcount); + info->is_active = 1; + + cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, ses->iface_count); + cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); + cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__, + le32_to_cpu(p->Capability)); + + spin_lock(&ses->iface_lock); + if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_add_tail(&info->iface_head, &iface->iface_head); + kref_put(&iface->refcount, release_iface); + } else + list_add_tail(&info->iface_head, &ses->iface_list); + spin_unlock(&ses->iface_lock); + + ses->iface_count++; + ses->iface_last_update = jiffies; next_iface: + nb_iface++; next = le32_to_cpu(p->Next); - if (!next) + if (!next) { + bytes_left -= sizeof(*p); break; + } p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); bytes_left -= next; } - if (!*iface_count) { + if (!nb_iface) { + cifs_dbg(VFS, "%s: malformed interface info\n", __func__); rc = -EINVAL; goto out; } -out: - if (rc) { - kfree(*iface_list); - *iface_count = 0; - *iface_list = NULL; - } - return rc; -} + /* Azure rounds the buffer size up 8, to a 16 byte boundary */ + if ((bytes_left > 8) || p->Next) + cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); -static int compare_iface(const void *ia, const void *ib) -{ - const struct cifs_server_iface *a = (struct cifs_server_iface *)ia; - const struct cifs_server_iface *b = (struct cifs_server_iface *)ib; - return a->speed == b->speed ? 0 : (a->speed > b->speed ? -1 : 1); + if (!ses->iface_count) { + rc = -EINVAL; + goto out; + } + +out: + return rc; } static int @@ -657,8 +677,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) int rc; unsigned int ret_data_len = 0; struct network_interface_info_ioctl_rsp *out_buf = NULL; - struct cifs_server_iface *iface_list; - size_t iface_count; struct cifs_ses *ses = tcon->ses; rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, @@ -674,21 +692,10 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) goto out; } - rc = parse_server_interfaces(out_buf, ret_data_len, - &iface_list, &iface_count); + rc = parse_server_interfaces(out_buf, ret_data_len, ses); if (rc) goto out; - /* sort interfaces from fastest to slowest */ - sort(iface_list, iface_count, sizeof(*iface_list), compare_iface, NULL); - - spin_lock(&ses->iface_lock); - kfree(ses->iface_list); - ses->iface_list = iface_list; - ses->iface_count = iface_count; - ses->iface_last_update = jiffies; - spin_unlock(&ses->iface_lock); - out: kfree(out_buf); return rc; -- cgit v1.2.3 From b54034a73baf9fe31fb3f218c17bd5308a27a1ca Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 3 Jan 2022 08:47:30 +0000 Subject: cifs: during reconnect, update interface if necessary Going forward, the plan is to periodically query the server for it's interfaces (when multichannel is enabled). This change allows checking for inactive interfaces during reconnect, and reconnect to a new interface if necessary. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 5 ++++ fs/cifs/connect.c | 4 +++ fs/cifs/sess.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3b7366ec03c7..00c4a8aa2649 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -636,6 +636,11 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, bool cifs_chan_needs_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server); +bool +cifs_chan_is_iface_active(struct cifs_ses *ses, + struct TCP_Server_Info *server); +int +cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); void extract_unc_hostname(const char *unc, const char **h, size_t *len); int copy_path_name(char *dst, const char *src); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 248fe1805c17..d8aae257649f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -232,6 +232,10 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + /* check if iface is still active */ + if (!cifs_chan_is_iface_active(ses, server)) + cifs_chan_update_iface(ses, server); + spin_lock(&ses->chan_lock); if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) goto next_session; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d7b6e5687df2..7c26ee70c8b0 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -146,6 +146,16 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); } +bool +cifs_chan_is_iface_active(struct cifs_ses *ses, + struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + + return ses->chans[chan_index].iface && + ses->chans[chan_index].iface->is_active; +} + /* returns number of channels added */ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { @@ -244,6 +254,75 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) return new_chan_count - old_chan_count; } +/* + * update the iface for the channel if necessary. + * will return 0 when iface is updated. 1 otherwise + * Must be called with chan_lock held. + */ +int +cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) +{ + unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + struct cifs_server_iface *iface = NULL; + struct cifs_server_iface *old_iface = NULL; + int rc = 0; + + /* primary channel. This can never go away */ + if (!chan_index) + return 0; + + if (ses->chans[chan_index].iface) { + old_iface = ses->chans[chan_index].iface; + if (old_iface->is_active) + return 1; + } + + spin_lock(&ses->iface_lock); + + /* then look for a new one */ + list_for_each_entry(iface, &ses->iface_list, iface_head) { + if (!iface->is_active || + (is_ses_using_iface(ses, iface) && + !iface->rss_capable)) { + continue; + } + kref_get(&iface->refcount); + } + + if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + rc = 1; + iface = NULL; + cifs_dbg(FYI, "unable to find a suitable iface\n"); + } + + ses->chans[chan_index].iface = iface; + + /* now drop the ref to the current iface */ + if (old_iface && iface) { + kref_put(&old_iface->refcount, release_iface); + cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", + &old_iface->sockaddr, + &iface->sockaddr); + } else if (old_iface) { + kref_put(&old_iface->refcount, release_iface); + cifs_dbg(FYI, "releasing ref to iface: %pIS\n", + &old_iface->sockaddr); + } else { + WARN_ON(!iface); + cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); + } + + spin_unlock(&ses->iface_lock); + + /* No iface is found. if secondary chan, drop connection */ + if (!iface && CIFS_SERVER_IS_CHAN(server)) { + cifs_put_tcp_session(server, false); + ses->chans[chan_index].server = NULL; + } + + return rc; +} + /* * If server is a channel of ses, return the corresponding enclosing * cifs_chan otherwise return NULL. -- cgit v1.2.3 From 6e1c1c08cdf3d7d7b8c571c0f032a283af6ca024 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 6 Jun 2022 09:17:56 +0000 Subject: cifs: periodically query network interfaces from server Currently, we only query the server for network interfaces information at the time of mount, and never afterwards. This can be a problem, especially for services like Azure, where the IP address of the channel endpoints can change over time. With this change, we schedule a 600s polling of this info from the server for each tree connect. An alternative for periodic polling was to do this only at the time of reconnect. But this could delay the reconnect time slightly. Also, there are some challenges w.r.t how we have cifs_reconnect implemented today. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++++ fs/cifs/cifsproto.h | 2 ++ fs/cifs/connect.c | 28 ++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 2 +- 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7fc2addc95a3..a643c84ff1e9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -80,6 +80,9 @@ #define SMB_DNS_RESOLVE_INTERVAL_MIN 120 #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600 +/* smb multichannel query server interfaces interval in seconds */ +#define SMB_INTERFACE_POLL_INTERVAL 600 + /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 5 @@ -1255,6 +1258,7 @@ struct cifs_tcon { #ifdef CONFIG_CIFS_DFS_UPCALL struct list_head ulist; /* cache update list */ #endif + struct delayed_work query_interfaces; /* query interfaces workqueue job */ }; /* diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 00c4a8aa2649..d59aebefa71c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -641,6 +641,8 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); +int +SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon); void extract_unc_hostname(const char *unc, const char **h, size_t *len); int copy_path_name(char *dst, const char *src); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d8aae257649f..e666d2643ede 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -145,6 +145,25 @@ requeue_resolve: return rc; } +static void smb2_query_server_interfaces(struct work_struct *work) +{ + int rc; + struct cifs_tcon *tcon = container_of(work, + struct cifs_tcon, + query_interfaces.work); + + /* + * query server network interfaces, in case they change + */ + rc = SMB3_request_interfaces(0, tcon); + if (rc) { + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); + } + + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); +} static void cifs_resolve_server(struct work_struct *work) { @@ -2276,6 +2295,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) list_del_init(&tcon->tcon_list); spin_unlock(&cifs_tcp_ses_lock); + /* cancel polling of interfaces */ + cancel_delayed_work_sync(&tcon->query_interfaces); + if (tcon->use_witness) { int rc; @@ -2513,6 +2535,12 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); + /* schedule query interfaces poll */ + INIT_DELAYED_WORK(&tcon->query_interfaces, + smb2_query_server_interfaces); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index db4ccf1d235e..8802995b2d3d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -671,7 +671,7 @@ out: return rc; } -static int +int SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) { int rc; -- cgit v1.2.3 From 15b694e96c31807d8515aacfa687a1e8a4fbbadc Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 23 Jun 2022 16:56:44 +0800 Subject: USB: serial: option: add Quectel RM500K module support Add usb product id of the Quectel RM500K module. RM500K provides 2 mandatory interfaces to Linux host after enumeration. - /dev/ttyUSB5: this is a serial interface for control path. User needs to write AT commands to this device node to query status, set APN, set PIN code, and enable/disable the data connection to 5G network. - ethX: this is the data path provided as a RNDIS devices. After the data connection has been established, Linux host can access 5G data network via this interface. "RNDIS": RNDIS + ADB + AT (/dev/ttyUSB5) + MODEM COMs usb-devices output for 0x7001: T: Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=7001 Rev=00.01 S: Manufacturer=MediaTek Inc. S: Product=USB DATA CARD S: SerialNumber=869206050009672 C: #Ifs=10 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=02 Prot=ff Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=125us I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=09(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Co-developed-by: Ballon Shi Signed-off-by: Ballon Shi Signed-off-by: Macpaul Lin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 61d34886c706..de59fa919540 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -257,6 +257,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 +#define QUECTEL_PRODUCT_RM500K 0x7001 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1150,6 +1151,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, -- cgit v1.2.3 From 41f38043f884c66af4114a7109cf540d6222f450 Mon Sep 17 00:00:00 2001 From: Leo Savernik Date: Wed, 22 Jun 2022 12:19:21 +0200 Subject: nvme: add a bogus subsystem NQN quirk for Micron MTFDKBA2T0TFH The Micron MTFDKBA2T0TFH device reports the same subsysem NQN for all devices. Add a quick to ignore it. Signed-off-by: Leo Savernik Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c7012e85d035..1871d216e1f1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3474,6 +3474,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ -- cgit v1.2.3 From 23c9cd56007e90b2c2317c5eab6ab12921b4314a Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Tue, 21 Jun 2022 09:05:00 +0200 Subject: nvme: fix the CRIMS and CRWMS definitions to match the spec Adjust the values of NVME_CAP_CRMS_CRIMS and NVME_CAP_CRMS_CRWMS masks as they are different from the ones in TP4084 - Time-to-ready. Fixes: 354201c53e61 ("nvme: add support for TP4084 - Time-to-Ready Enhancements"). Signed-off-by: Joel Granados Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 29ec3e3481ff..e3934003f239 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -233,8 +233,8 @@ enum { }; enum { - NVME_CAP_CRMS_CRIMS = 1ULL << 59, - NVME_CAP_CRMS_CRWMS = 1ULL << 60, + NVME_CAP_CRMS_CRWMS = 1ULL << 59, + NVME_CAP_CRMS_CRIMS = 1ULL << 60, }; struct nvme_id_power_state { -- cgit v1.2.3 From e6487833182a8a0187f0292aca542fc163ccd03e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jun 2022 10:29:42 +0200 Subject: nvme: move the Samsung X5 quirk entry to the core quirks This device shares the PCI ID with the Samsung 970 Evo Plus that does not need or want the quirks. Move the the quirk entry to the core table based on the model number instead. Fixes: bc360b0b1611 ("nvme-pci: add quirks for Samsung X5 SSDs") Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav --- drivers/nvme/host/core.c | 14 ++++++++++++++ drivers/nvme/host/pci.c | 4 ---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3ab2cfd254a4..b3d9c29aba1e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2546,6 +2546,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x1e0f, .mn = "KCD6XVUL6T40", .quirks = NVME_QUIRK_NO_APST, + }, + { + /* + * The external Samsung X5 SSD fails initialization without a + * delay before checking if it is ready and has a whole set of + * other problems. To make this even more interesting, it + * shares the PCI ID with internal Samsung 970 Evo Plus that + * does not need or want these quirks. + */ + .vid = 0x144d, + .mn = "Samsung Portable SSD X5", + .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, } }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1871d216e1f1..d7b24ee17285 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3526,10 +3526,6 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| - NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; -- cgit v1.2.3 From 3be4562584bba603f33863a00c1c32eecf772ee6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 22 Jun 2022 12:14:24 -0700 Subject: dma-direct: use the correct size for dma_set_encrypted() The third parameter of dma_set_encrypted() is a size in bytes rather than the number of pages. Fixes: 4d0564785bb0 ("dma-direct: factor out dma_set_{de,en}crypted helpers") Signed-off-by: Dexuan Cui Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index e978f36e6be8..8d0b68a17042 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -357,7 +357,7 @@ void dma_direct_free(struct device *dev, size_t size, } else { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); - if (dma_set_encrypted(dev, cpu_addr, 1 << page_order)) + if (dma_set_encrypted(dev, cpu_addr, size)) return; } @@ -392,7 +392,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { - unsigned int page_order = get_order(size); void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -400,7 +399,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (dma_set_encrypted(dev, vaddr, 1 << page_order)) + if (dma_set_encrypted(dev, vaddr, size)) return; __dma_direct_free_pages(dev, page, size); } -- cgit v1.2.3 From dbe97cff7dd9f0f75c524afdd55ad46be3d15295 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Tue, 21 Jun 2022 22:27:26 -0400 Subject: xen/gntdev: Avoid blocking in unmap_grant_pages() unmap_grant_pages() currently waits for the pages to no longer be used. In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a deadlock against i915: i915 was waiting for gntdev's MMU notifier to finish, while gntdev was waiting for i915 to free its pages. I also believe this is responsible for various deadlocks I have experienced in the past. Avoid these problems by making unmap_grant_pages async. This requires making it return void, as any errors will not be available when the function returns. Fortunately, the only use of the return value is a WARN_ON(), which can be replaced by a WARN_ON when the error is detected. Additionally, a failed call will not prevent further calls from being made, but this is harmless. Because unmap_grant_pages is now async, the grant handle will be sent to INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same handle. Instead, a separate bool array is allocated for this purpose. This wastes memory, but stuffing this information in padding bytes is too fragile. Furthermore, it is necessary to grab a reference to the map before making the asynchronous call, and release the reference when the call returns. It is also necessary to guard against reentrancy in gntdev_map_put(), and to handle the case where userspace tries to map a mapping whose contents have not all been freed yet. Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") Cc: stable@vger.kernel.org Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev-common.h | 7 ++ drivers/xen/gntdev.c | 157 ++++++++++++++++++++++++++++++-------------- 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 20d7d059dadb..40ef379c28ab 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -16,6 +16,7 @@ #include #include #include +#include struct gntdev_dmabuf_priv; @@ -56,6 +57,7 @@ struct gntdev_grant_map { struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; + bool *being_removed; struct page **pages; unsigned long pages_vm_start; @@ -73,6 +75,11 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Number of live grants */ + atomic_t live_grants; + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 59ffea800079..4b56c39f766d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -60,10 +61,11 @@ module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by one mapping request"); +/* True in PV mode, false otherwise */ static int use_ptemod; -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) kvfree(map->unmap_ops); kvfree(map->kmap_ops); kvfree(map->kunmap_ops); + kvfree(map->being_removed); kfree(map); } @@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->being_removed = + kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || - NULL == add->pages) + NULL == add->pages || + NULL == add->being_removed) goto err; if (use_ptemod) { add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), @@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - if (map->pages && !use_ptemod) + if (map->pages && !use_ptemod) { + /* + * Increment the reference count. This ensures that the + * subsequent call to unmap_grant_pages() will not wind up + * re-entering itself. It *can* wind up calling + * gntdev_put_map() recursively, but such calls will be with a + * reference count greater than 1, so they will return before + * this code is reached. The recursion depth is thus limited to + * 1. Do NOT use refcount_inc() here, as it will detect that + * the reference count is zero and WARN(). + */ + refcount_set(&map->users, 1); + + /* + * Unmap the grants. This may or may not be asynchronous, so it + * is possible that the reference count is 1 on return, but it + * could also be greater than 1. + */ unmap_grant_pages(map, 0, map->count); + /* Check if the memory now needs to be freed */ + if (!refcount_dec_and_test(&map->users)) + return; + + /* + * All pages have been returned to the hypervisor, so free the + * map. + */ + } + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) int gntdev_map_grant_pages(struct gntdev_grant_map *map) { + size_t alloced = 0; int i, err = 0; if (!use_ptemod) { @@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->count); for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status == GNTST_okay) + if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - else if (!err) + if (!use_ptemod) + alloced++; + } else if (!err) err = -EINVAL; if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; if (use_ptemod) { - if (map->kmap_ops[i].status == GNTST_okay) + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; - else if (!err) + } else if (!err) err = -EINVAL; } } + atomic_add(alloced, &map->live_grants); return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; - - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages) { - /* No need for kmap, pages are in lowmem */ - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } - } - - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; - - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - if (map->kunmap_ops[offset+i].status) - err = -EINVAL; + WARN_ON(map->kunmap_ops[offset+i].status); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } - return err; + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ + atomic_sub(data->count, &map->live_grants); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map); +} + +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify.addr >> PAGE_SHIFT); + + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + refcount_inc(&map->users); /* to keep map alive during async call below */ + + gnttab_unmap_refs_async(&map->unmap_data); } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + int range; + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { - while (pages && - map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { + while (pages) { + while (pages && map->being_removed[offset]) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset + range].handle == - INVALID_GRANT_HANDLE) + if (map->being_removed[offset + range]) break; + map->being_removed[offset + range] = true; range++; } - err = __unmap_grant_pages(map, offset, range); + if (range) + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; - int err; if (!mmu_notifier_range_blockable(range)) return false; @@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, map->index, map->count, map->vma->vm_start, map->vma->vm_end, range->start, range->end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return true; } @@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; if (use_ptemod && map->vma) goto unlock_out; + if (atomic_read(&map->live_grants)) { + err = -EAGAIN; + goto unlock_out; + } refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; -- cgit v1.2.3 From ca2a3343d69741dae4df2dbb954fb806d9a835de Mon Sep 17 00:00:00 2001 From: Li Nan Date: Thu, 23 Jun 2022 15:41:00 +0800 Subject: block: remove WARN_ON() from bd_link_disk_holder Since commit 83cbce957446("block: add error handling for device_add_disk / add_disk"), bdev->bd_holder_dir can not be empty now, so remove WARN_ON() from bd_link_disk_holder. Signed-off-by: Li Nan Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074100.2251301-1-linan122@huawei.com Signed-off-by: Jens Axboe --- block/holder.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/holder.c b/block/holder.c index 8d750281a1cd..5283bc804cc1 100644 --- a/block/holder.c +++ b/block/holder.c @@ -79,10 +79,6 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) WARN_ON_ONCE(!bdev->bd_holder); - /* FIXME: remove the following once add_disk() handles errors */ - if (WARN_ON(!bdev->bd_holder_dir)) - goto out_unlock; - holder = bd_find_holder_disk(bdev, disk); if (holder) { holder->refcnt++; -- cgit v1.2.3 From 9e2f6498efbbc880d7caa7935839e682b64fe5a6 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 15 Jun 2022 18:57:06 +0000 Subject: selftests: KVM: Handle compiler optimizations in ucall The selftests, when built with newer versions of clang, is found to have over optimized guests' ucall() function, and eliminating the stores for uc.cmd (perhaps due to no immediate readers). This resulted in the userspace side always reading a value of '0', and causing multiple test failures. As a result, prevent the compiler from optimizing the stores in ucall() with WRITE_ONCE(). Suggested-by: Ricardo Koller Suggested-by: Reiji Watanabe Signed-off-by: Raghavendra Rao Ananta Message-Id: <20220615185706.1099208-1-rananta@google.com> Reviewed-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index e0b0164e9af8..be1d9728c4ce 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) -- cgit v1.2.3 From a808925075fb750804a60ff0710614466c396db4 Mon Sep 17 00:00:00 2001 From: Hongyu Xie Date: Thu, 23 Jun 2022 14:19:42 +0300 Subject: xhci: Keep interrupt disabled in initialization until host is running. irq is disabled in xhci_quiesce(called by xhci_halt, with bit:2 cleared in USBCMD register), but xhci_run(called by usb_add_hcd) re-enable it. It's possible that you will receive thousands of interrupt requests after initialization for 2.0 roothub. And you will get a lot of warning like, "xHCI dying, ignoring interrupt. Shouldn't IRQs be disabled?". This amount of interrupt requests will cause the entire system to freeze. This problem was first found on a device with ASM2142 host controller on it. [tidy up old code while moving it, reword header -Mathias] Cc: stable@kernel.org Signed-off-by: Hongyu Xie Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 9ac56e9ffc64..cb99bed5f755 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -611,15 +611,37 @@ static int xhci_init(struct usb_hcd *hcd) static int xhci_run_finished(struct xhci_hcd *xhci) { + unsigned long flags; + u32 temp; + + /* + * Enable interrupts before starting the host (xhci 4.2 and 5.5.2). + * Protect the short window before host is running with a lock + */ + spin_lock_irqsave(&xhci->lock, flags); + + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable interrupts"); + temp = readl(&xhci->op_regs->command); + temp |= (CMD_EIE); + writel(temp, &xhci->op_regs->command); + + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable primary interrupter"); + temp = readl(&xhci->ir_set->irq_pending); + writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending); + if (xhci_start(xhci)) { xhci_halt(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); return -ENODEV; } + xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; if (xhci->quirks & XHCI_NEC_HOST) xhci_ring_cmd_db(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); + return 0; } @@ -668,19 +690,6 @@ int xhci_run(struct usb_hcd *hcd) temp |= (xhci->imod_interval / 250) & ER_IRQ_INTERVAL_MASK; writel(temp, &xhci->ir_set->irq_control); - /* Set the HCD state before we enable the irqs */ - temp = readl(&xhci->op_regs->command); - temp |= (CMD_EIE); - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Enable interrupts, cmd = 0x%x.", temp); - writel(temp, &xhci->op_regs->command); - - temp = readl(&xhci->ir_set->irq_pending); - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Enabling event ring interrupter %p by writing 0x%x to irq_pending", - xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp)); - writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending); - if (xhci->quirks & XHCI_NEC_HOST) { struct xhci_command *command; -- cgit v1.2.3 From 83810f84ecf11dfc5a9414a8b762c3501b328185 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 23 Jun 2022 14:19:43 +0300 Subject: xhci: turn off port power in shutdown If ports are not turned off in shutdown then runtime suspended self-powered USB devices may survive in U3 link state over S5. During subsequent boot, if firmware sends an IPC command to program the port in DISCONNECT state, it will time out, causing significant delay in the boot time. Turning off roothub port power is also recommended in xhci specification 4.19.4 "Port Power" in the additional note. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci.c | 15 +++++++++++++-- drivers/usb/host/xhci.h | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c54f2bc23d3f..0fdc014c9401 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -652,7 +652,7 @@ struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd) * It will release and re-aquire the lock while calling ACPI * method. */ -static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, bool on, unsigned long *flags) __must_hold(&xhci->lock) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index cb99bed5f755..65858f607437 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -791,6 +791,8 @@ static void xhci_stop(struct usb_hcd *hcd) void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + unsigned long flags; + int i; if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); @@ -806,12 +808,21 @@ void xhci_shutdown(struct usb_hcd *hcd) del_timer_sync(&xhci->shared_hcd->rh_timer); } - spin_lock_irq(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); xhci_halt(xhci); + + /* Power off USB2 ports*/ + for (i = 0; i < xhci->usb2_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags); + + /* Power off USB3 ports*/ + for (i = 0; i < xhci->usb3_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags); + /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) xhci_reset(xhci, XHCI_RESET_SHORT_USEC); - spin_unlock_irq(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); xhci_cleanup_msix(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0bd76c94a4b1..28aaf031f9a8 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2196,6 +2196,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd); +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, + bool on, unsigned long *flags); void xhci_hc_died(struct xhci_hcd *xhci); -- cgit v1.2.3 From 7516da47a349e74de623243a27f9b8a91446bf4f Mon Sep 17 00:00:00 2001 From: Tanveer Alam Date: Thu, 23 Jun 2022 14:19:44 +0300 Subject: xhci-pci: Allow host runtime PM as default for Intel Raptor Lake xHCI In the same way as Intel Alder Lake TCSS (Type-C Subsystem) the Raptor Lake TCSS xHCI needs to be runtime suspended whenever possible to allow the TCSS hardware block to enter D3cold and thus save energy. Cc: stable@kernel.org Signed-off-by: Tanveer Alam Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index fac9492a8bda..d66ea276ccec 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -61,6 +61,7 @@ #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed +#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -269,7 +270,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && -- cgit v1.2.3 From 8ffdc53a60049f3930afe161dc51c67959c8d83d Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Thu, 23 Jun 2022 14:19:45 +0300 Subject: xhci-pci: Allow host runtime PM as default for Intel Meteor Lake xHCI Meteor Lake TCSS(Type-C Subsystem) xHCI needs to be runtime suspended whenever possible to allow the TCSS hardware block to enter D3cold and thus save energy. Cc: stable@kernel.org Signed-off-by: Utkarsh Patel Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220623111945.1557702-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index d66ea276ccec..dce6c0ec8d34 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -62,6 +62,7 @@ #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e +#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -271,7 +272,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && -- cgit v1.2.3 From 9ca766eaea2e87b8b773bff04ee56c055cb76d4e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 11:29:48 +0300 Subject: gpio: winbond: Fix error code in winbond_gpio_get() This error path returns 1, but it should instead propagate the negative error code from winbond_sio_enter(). Fixes: a0d65009411c ("gpio: winbond: Add driver") Signed-off-by: Dan Carpenter Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-winbond.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 7f8f5b02e31d..4b61d975cc0e 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -385,12 +385,13 @@ static int winbond_gpio_get(struct gpio_chip *gc, unsigned int offset) unsigned long *base = gpiochip_get_data(gc); const struct winbond_gpio_info *info; bool val; + int ret; winbond_gpio_get_info(&offset, &info); - val = winbond_sio_enter(*base); - if (val) - return val; + ret = winbond_sio_enter(*base); + if (ret) + return ret; winbond_sio_select_logical(*base, info->dev); -- cgit v1.2.3 From e70b64a3f28b9f54602ae3e706b1dc1338de3df7 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 23 Jun 2022 01:37:43 -0700 Subject: io_uring: move io_uring_get_opcode out of TP_printk The TP_printk macro's are not supposed to use custom code ([1]) or else tools such as perf cannot use these events. Convert the opcode string representation to use the __string wiring that the event framework provides ([2]). [1]: https://lwn.net/Articles/379903/ [2]: https://lwn.net/Articles/381064/ Fixes: 033b87d24f72 ("io_uring: use the text representation of ops in trace") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220623083743.2648321-1-dylany@fb.com [axboe: fixup spurious removal of sq_thread assignment] Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 42 ++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 66fcc5a1a5b1..aa2f951b07cd 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -158,6 +158,8 @@ TRACE_EVENT(io_uring_queue_async_work, __field( unsigned int, flags ) __field( struct io_wq_work *, work ) __field( int, rw ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -168,11 +170,13 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->opcode = opcode; __entry->work = work; __entry->rw = rw; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); @@ -198,6 +202,8 @@ TRACE_EVENT(io_uring_defer, __field( void *, req ) __field( unsigned long long, data ) __field( u8, opcode ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -205,11 +211,13 @@ TRACE_EVENT(io_uring_defer, __entry->req = req; __entry->data = user_data; __entry->opcode = opcode; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", __entry->ctx, __entry->req, __entry->data, - io_uring_get_opcode(__entry->opcode)) + __get_str(op_str)) ); /** @@ -298,6 +306,8 @@ TRACE_EVENT(io_uring_fail_link, __field( unsigned long long, user_data ) __field( u8, opcode ) __field( void *, link ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -306,11 +316,13 @@ TRACE_EVENT(io_uring_fail_link, __entry->user_data = user_data; __entry->opcode = opcode; __entry->link = link; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), __entry->link) + __get_str(op_str), __entry->link) ); /** @@ -390,6 +402,8 @@ TRACE_EVENT(io_uring_submit_sqe, __field( u32, flags ) __field( bool, force_nonblock ) __field( bool, sq_thread ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -400,11 +414,13 @@ TRACE_EVENT(io_uring_submit_sqe, __entry->flags = flags; __entry->force_nonblock = force_nonblock; __entry->sq_thread = sq_thread; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " "non block %d, sq_thread %d", __entry->ctx, __entry->req, - __entry->user_data, io_uring_get_opcode(__entry->opcode), + __entry->user_data, __get_str(op_str), __entry->flags, __entry->force_nonblock, __entry->sq_thread) ); @@ -435,6 +451,8 @@ TRACE_EVENT(io_uring_poll_arm, __field( u8, opcode ) __field( int, mask ) __field( int, events ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -444,11 +462,13 @@ TRACE_EVENT(io_uring_poll_arm, __entry->opcode = opcode; __entry->mask = mask; __entry->events = events; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->mask, __entry->events) ); @@ -474,6 +494,8 @@ TRACE_EVENT(io_uring_task_add, __field( unsigned long long, user_data ) __field( u8, opcode ) __field( int, mask ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -482,11 +504,13 @@ TRACE_EVENT(io_uring_task_add, __entry->user_data = user_data; __entry->opcode = opcode; __entry->mask = mask; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->mask) ); @@ -523,6 +547,8 @@ TRACE_EVENT(io_uring_req_failed, __field( u64, pad1 ) __field( u64, addr3 ) __field( int, error ) + + __string( op_str, io_uring_get_opcode(sqe->opcode) ) ), TP_fast_assign( @@ -542,6 +568,8 @@ TRACE_EVENT(io_uring_req_failed, __entry->pad1 = sqe->__pad2[0]; __entry->addr3 = sqe->addr3; __entry->error = error; + + __assign_str(op_str, io_uring_get_opcode(sqe->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, " @@ -550,7 +578,7 @@ TRACE_EVENT(io_uring_req_failed, "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, " "error=%d", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->flags, __entry->ioprio, (unsigned long long)__entry->off, (unsigned long long) __entry->addr, __entry->len, -- cgit v1.2.3 From c1c2a15c2b5379ea8e44dcdcc298e3de42076ba0 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 21 Jun 2022 08:40:36 +0200 Subject: gpio: grgpio: Fix device removing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a platform device's remove callback returns non-zero, the device core emits a warning and still removes the device and calls the devm cleanup callbacks. So it's not save to not unregister the gpiochip because on the next request to a GPIO the driver accesses kfree()'d memory. Also if an IRQ triggers, the freed memory is accessed. Instead rely on the GPIO framework to ensure that after gpiochip_remove() all GPIOs are freed and so the corresponding IRQs are unmapped. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-grgpio.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index df563616f943..bea0e32c195d 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -434,25 +434,13 @@ static int grgpio_probe(struct platform_device *ofdev) static int grgpio_remove(struct platform_device *ofdev) { struct grgpio_priv *priv = platform_get_drvdata(ofdev); - int i; - int ret = 0; - - if (priv->domain) { - for (i = 0; i < GRGPIO_MAX_NGPIO; i++) { - if (priv->uirqs[i].refcnt != 0) { - ret = -EBUSY; - goto out; - } - } - } gpiochip_remove(&priv->gc); if (priv->domain) irq_domain_remove(priv->domain); -out: - return ret; + return 0; } static const struct of_device_id grgpio_match[] = { -- cgit v1.2.3 From cc02e6e21aa5f2ac0defe8c15e5a9d024da6e73d Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 10 Jun 2022 11:04:36 +0200 Subject: s390/crash: add missing iterator advance in copy_oldmem_page() In case old memory was successfully copied the passed iterator should be advanced as well. Currently copy_oldmem_page() is always called with single-segment iterator. Should that ever change - copy_oldmem_user and copy_oldmem_kernel() functions would need a rework to deal with multi-segment iterators. Fixes: 5d8de293c224 ("vmcore: convert copy_oldmem_page() to take an iov_iter") Reviewed-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Signed-off-by: Alexander Gordeev --- arch/s390/kernel/crash_dump.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a2c1c55daec0..2534a31d2550 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -219,6 +219,11 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, unsigned long src; int rc; + if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter))) + return -EINVAL; + /* Multi-segment iterators are not supported */ + if (iter->nr_segs > 1) + return -EINVAL; if (!csize) return 0; src = pfn_to_phys(pfn) + offset; @@ -228,6 +233,8 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, rc = copy_oldmem_user(iter->iov->iov_base, src, csize); else rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); + if (!rc) + iov_iter_advance(iter, csize); return rc; } -- cgit v1.2.3 From af2debd58bd769e38f538143f0d332e15d753396 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 9 Jun 2022 21:32:39 +0200 Subject: s390/crash: make copy_oldmem_page() return number of bytes copied Callback copy_oldmem_page() returns either error code or zero. Instead, it should return the error code or number of bytes copied. Fixes: df9694c7975f ("s390/dump: streamline oldmem copy functions") Reviewed-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Signed-off-by: Alexander Gordeev --- arch/s390/kernel/crash_dump.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 2534a31d2550..28124d0fa1d5 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -233,9 +233,10 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, rc = copy_oldmem_user(iter->iov->iov_base, src, csize); else rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); - if (!rc) - iov_iter_advance(iter, csize); - return rc; + if (rc < 0) + return rc; + iov_iter_advance(iter, csize); + return csize; } /* -- cgit v1.2.3 From be857b7f77d130dbbd47c91fc35198b040f35865 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 10 Jun 2022 15:19:00 +0200 Subject: s390/cpumf: Handle events cycles and instructions identical Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different perf_event attribute::type values: - PERF_TYPE_HARDWARE: when invoked via perf tool predefined events name cycles or cpu-cycles or instructions. - pmu->type: when invoked via perf tool event name cpu_cf/CPU_CYLCES/ or cpu_cf/INSTRUCTIONS/. This invocation also selects the PMU to which the event belongs. Handle both type of invocations identical for events CPU_CYLCES and INSTRUCTIONS. They address the same hardware. The result is different when event modifier exclude_kernel is also set. Invocation with event modifier for user space event counting fails. Output before: # perf stat -e cpum_cf/cpu_cycles/u -- true Performance counter stats for 'true': cpum_cf/cpu_cycles/u 0.000761033 seconds time elapsed 0.000076000 seconds user 0.000725000 seconds sys # Output after: # perf stat -e cpum_cf/cpu_cycles/u -- true Performance counter stats for 'true': 349,613 cpum_cf/cpu_cycles/u 0.000844143 seconds time elapsed 0.000079000 seconds user 0.000800000 seconds sys # Fixes: 6a82e23f45fe ("s390/cpumf: Adjust registration of s390 PMU device drivers") Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar [agordeev@linux.ibm.com corrected commit ID of Fixes commit] Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_cpum_cf.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 483ab5e10164..f7dd3c849e68 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; -- cgit v1.2.3 From 541a496644512ebed429b33f19a3fadfb19c6ee7 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Jun 2022 12:40:46 +0200 Subject: s390/pai: Prevent invalid event number for pai_crypto PMU The pai_crypto PMU has to check the event number. It has to be in the supported range. This is not the case, the lower limit is not checked. Fix this and obey the lower limit. Fixes: 39d62336f5c1 ("s390/pai: add support for cryptography counters") Signed-off-by: Thomas Richter Suggested-by: Sumanth Korikkar Reviewed-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 8c1545946d85..188c6cf09c7a 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -193,8 +193,9 @@ static int paicrypt_event_init(struct perf_event *event) /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) return -ENOENT; - /* PAI crypto event must be valid */ - if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt) + /* PAI crypto event must be in valid range */ + if (a->config < PAI_CRYPTO_BASE || + a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; /* Allow only CPU wide operation, no process context for now. */ if (event->hw.target || event->cpu == -1) -- cgit v1.2.3 From 21e876448792af2dd5261338907c72bdf37fa056 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 15 Jun 2022 14:02:07 +0200 Subject: s390/pai: Fix multiple concurrent event installation Two different events such as pai_crypto/KM_AES_128/ and pai_crypto/KM_AES_192/ can be installed multiple times on the same CPU and the events are executed concurrently: # perf stat -e pai_crypto/KM_AES_128/ -C0 -a -- sleep 5 & # sleep 2 # perf stat -e pai_crypto/KM_AES_192/ -C0 -a -- true This results in the first event being installed two times with two seconds delay. The kernel does install the second event after the first event has been deleted and re-added, as can be seen in the traces: 13:48:47.600350 paicrypt_start event 0x1007 (event KM_AES_128) 13:48:49.599359 paicrypt_stop event 0x1007 (event KM_AES_128) 13:48:49.599198 paicrypt_start event 0x1007 13:48:49.599199 paicrypt_start event 0x1008 13:48:49.599921 paicrypt_event_destroy event 0x1008 13:48:52.601507 paicrypt_event_destroy event 0x1007 This is caused by functions event_sched_in() and event_sched_out() which call the PMU's add() and start() functions on schedule_in and the PMU's stop() and del() functions on schedule_out. This is correct for events attached to processes. The pai_crypto events are system-wide events and not attached to processes. Since the kernel common code can not be changed easily, fix this issue and do not reset the event count value to zero each time the event is added and started. Instead use a flag and zero the event count value only when called immediately after the event has been initialized. Therefore only the first invocation of the the event's add() function initializes the event count value to zero. The following invocations of the event's add() function leave the current event count value untouched. Fixes: 39d62336f5c1 ("s390/pai: add support for cryptography counters") Reported-by: Sumanth Korikkar Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 188c6cf09c7a..b38b4ae01589 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -209,6 +209,12 @@ static int paicrypt_event_init(struct perf_event *event) if (rc) return rc; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ + event->hw.last_tag = 0; cpump->event = event; event->destroy = paicrypt_event_destroy; @@ -243,9 +249,12 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - local64_set(&event->count, 0); + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paicrypt_getall(event); /* Get current value */ + local64_set(&event->count, 0); + local64_set(&event->hw.prev_count, sum); + } } static int paicrypt_add(struct perf_event *event, int flags) -- cgit v1.2.3 From b653db77350c7307a513b81856fe53e94cf42446 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 19 Jun 2022 10:37:32 -0400 Subject: mm: Clear page->private when splitting or migrating a page In our efforts to remove uses of PG_private, we have found folios with the private flag clear and folio->private not-NULL. That is the root cause behind 642d51fb0775 ("ceph: check folio PG_private bit instead of folio->private"). It can also affect a few other filesystems that haven't yet reported a problem. compaction_alloc() can return a page with uninitialised page->private, and rather than checking all the callers of migrate_pages(), just zero page->private after calling get_new_page(). Similarly, the tail pages from split_huge_page() may also have an uninitialised page->private. Reported-by: Xiubo Li Tested-by: Xiubo Li Signed-off-by: Matthew Wilcox (Oracle) --- mm/huge_memory.c | 1 + mm/migrate.c | 1 + 2 files changed, 2 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f7248002dad9..834f288b3769 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2377,6 +2377,7 @@ static void __split_huge_page_tail(struct page *head, int tail, page_tail); page_tail->mapping = head->mapping; page_tail->index = head->index + tail; + page_tail->private = 0; /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); diff --git a/mm/migrate.c b/mm/migrate.c index e51588e95f57..6c1ea61f39d8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1106,6 +1106,7 @@ static int unmap_and_move(new_page_t get_new_page, if (!newpage) return -ENOMEM; + newpage->private = 0; rc = __unmap_and_move(page, newpage, force, mode); if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(newpage, reason); -- cgit v1.2.3 From 00fa15e0d56482e32d8ca1f51d76b0ee00afb16b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 20 Jun 2022 19:05:36 +1000 Subject: filemap: Fix serialization adding transparent huge pages to page cache Commit 793917d997df ("mm/readahead: Add large folio readahead") introduced support for using large folios for filebacked pages if the filesystem supports it. page_cache_ra_order() was introduced to allocate and add these large folios to the page cache. However adding pages to the page cache should be serialized against truncation and hole punching by taking invalidate_lock. Not doing so can lead to data races resulting in stale data getting added to the page cache and marked up-to-date. See commit 730633f0b7f9 ("mm: Protect operations adding pages to page cache with invalidate_lock") for more details. This issue was found by inspection but a testcase revealed it was possible to observe in practice on XFS. Fix this by taking invalidate_lock in page_cache_ra_order(), to mirror what is done for the non-thp case in page_cache_ra_unbounded(). Signed-off-by: Alistair Popple Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- mm/readahead.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/readahead.c b/mm/readahead.c index 57a015108254..fdcd28cbd92d 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -510,6 +510,7 @@ void page_cache_ra_order(struct readahead_control *ractl, new_order--; } + filemap_invalidate_lock_shared(mapping); while (index <= limit) { unsigned int order = new_order; @@ -536,6 +537,7 @@ void page_cache_ra_order(struct readahead_control *ractl, } read_pages(ractl); + filemap_invalidate_unlock_shared(mapping); /* * If there were already pages in the page cache, then we may have -- cgit v1.2.3 From 20fb0c8272bbb102d15bdd11aa64f828619dd7cc Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:52 +0200 Subject: Revert "printk: Wait for the global console lock when the system is going down" This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-2-pmladek@suse.com --- include/linux/printk.h | 5 ----- kernel/panic.c | 2 -- kernel/printk/internal.h | 2 -- kernel/printk/printk.c | 4 ---- kernel/printk/printk_safe.c | 32 -------------------------------- kernel/reboot.c | 2 -- 6 files changed, 47 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index c1e07c0652c7..cd26aab0ab2a 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -174,7 +174,6 @@ extern void printk_prefer_direct_enter(void); extern void printk_prefer_direct_exit(void); extern bool pr_flush(int timeout_ms, bool reset_on_progress); -extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -239,10 +238,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) return true; } -static inline void try_block_console_kthreads(int timeout_ms) -{ -} - static inline int printk_ratelimit(void) { return 0; diff --git a/kernel/panic.c b/kernel/panic.c index fe73d18ecdf0..6737b2332275 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -273,7 +273,6 @@ void panic(const char *fmt, ...) * unfortunately means it may not be hardened to work in a * panic situation. */ - try_block_console_kthreads(10000); smp_send_stop(); } else { /* @@ -281,7 +280,6 @@ void panic(const char *fmt, ...) * kmsg_dump, we will need architecture dependent extra * works in addition to stopping other CPUs. */ - try_block_console_kthreads(10000); crash_smp_send_stop(); } diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index e7d8578860ad..d947ca6c84f9 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,8 +20,6 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; -extern bool block_console_kthreads; - __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b095fb5f5f61..45c6c2b0b104 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -250,9 +250,6 @@ static atomic_t console_kthreads_active = ATOMIC_INIT(0); #define console_kthread_printing_exit() \ atomic_dec(&console_kthreads_active) -/* Block console kthreads to avoid processing new messages. */ -bool block_console_kthreads; - /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -3733,7 +3730,6 @@ static bool printer_should_wake(struct console *con, u64 seq) if (con->blocked || console_kthreads_atomically_blocked() || - block_console_kthreads || system_state > SYSTEM_RUNNING || oops_in_progress) { return false; diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index caac4de1ea59..ef0f9a2044da 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include "internal.h" @@ -52,33 +50,3 @@ asmlinkage int vprintk(const char *fmt, va_list args) return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); - -/** - * try_block_console_kthreads() - Try to block console kthreads and - * make the global console_lock() avaialble - * - * @timeout_ms: The maximum time (in ms) to wait. - * - * Prevent console kthreads from starting processing new messages. Wait - * until the global console_lock() become available. - * - * Context: Can be called in any context. - */ -void try_block_console_kthreads(int timeout_ms) -{ - block_console_kthreads = true; - - /* Do not wait when the console lock could not be safely taken. */ - if (this_cpu_read(printk_context) || in_nmi()) - return; - - while (timeout_ms > 0) { - if (console_trylock()) { - console_unlock(); - return; - } - - udelay(1000); - timeout_ms -= 1; - } -} diff --git a/kernel/reboot.c b/kernel/reboot.c index 310363685502..4177645e74d6 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -74,7 +74,6 @@ void kernel_restart_prepare(char *cmd) { blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; - try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -263,7 +262,6 @@ static void kernel_shutdown_prepare(enum system_states state) blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); system_state = state; - try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } -- cgit v1.2.3 From 05c96b3713aa2a406d0c4ef0505bf80a6748fedb Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:53 +0200 Subject: Revert "printk: Block console kthreads when direct printing will be required" This reverts commit c3230283e2819a69dad2cf7a63143fde8bab8b5c. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-3-pmladek@suse.com --- kernel/printk/printk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 45c6c2b0b104..ea3dd55709e7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3729,9 +3729,7 @@ static bool printer_should_wake(struct console *con, u64 seq) return true; if (con->blocked || - console_kthreads_atomically_blocked() || - system_state > SYSTEM_RUNNING || - oops_in_progress) { + console_kthreads_atomically_blocked()) { return false; } -- cgit v1.2.3 From 007eeab7e9f03a3108c300c03e11a6c151e430c9 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:54 +0200 Subject: Revert "printk: remove @console_locked" This reverts commit ab406816fca009349b89cbde885daf68a8c77e33. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-4-pmladek@suse.com --- kernel/printk/printk.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ea3dd55709e7..dfd1a19b95d6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -340,7 +340,15 @@ static void console_kthreads_unblock(void) console_kthreads_blocked = false; } -static int console_suspended; +/* + * This is used for debugging the mess that is the VT code by + * keeping track if we have the console semaphore held. It's + * definitely not the perfect debug tool (we don't know if _WE_ + * hold it and are racing, but it helps tracking those weird code + * paths in the console code where we end up in places I want + * locked without the console semaphore held). + */ +static int console_locked, console_suspended; /* * Array of consoles built from command line options (console=) @@ -2711,6 +2719,7 @@ void console_lock(void) if (console_suspended) return; console_kthreads_block(); + console_locked = 1; console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); @@ -2735,26 +2744,15 @@ int console_trylock(void) up_console_sem(); return 0; } + console_locked = 1; console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); -/* - * This is used to help to make sure that certain paths within the VT code are - * running with the console lock held. It is definitely not the perfect debug - * tool (it is not known if the VT code is the task holding the console lock), - * but it helps tracking those weird code paths in the console code such as - * when the console is suspended: where the console is not locked but no - * console printing may occur. - * - * Note: This returns true when the console is suspended but is not locked. - * This is intentional because the VT code must consider that situation - * the same as if the console was locked. - */ int is_console_locked(void) { - return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); + return (console_locked || atomic_read(&console_kthreads_active)); } EXPORT_SYMBOL(is_console_locked); @@ -2810,6 +2808,8 @@ static inline bool console_is_usable(struct console *con) static void __console_unlock(void) { + console_locked = 0; + /* * Depending on whether console_lock() or console_trylock() was used, * appropriately allow the kthread printers to continue. @@ -3127,6 +3127,7 @@ void console_unblank(void) } else console_lock(); + console_locked = 1; console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) -- cgit v1.2.3 From 2d9ef940f89e0ab4fde7ba6f769d82f2a450c35a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:55 +0200 Subject: Revert "printk: extend console_lock for per-console locking" This reverts commit 8e274732115f63c1d09136284431b3555bd5cc56. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-5-pmladek@suse.com --- include/linux/console.h | 15 --- kernel/printk/printk.c | 261 +++++++++++------------------------------------- 2 files changed, 56 insertions(+), 220 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 143653090c48..9a251e70c090 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,6 @@ #include #include -#include struct vc_data; struct console_font_op; @@ -155,20 +154,6 @@ struct console { u64 seq; unsigned long dropped; struct task_struct *thread; - bool blocked; - - /* - * The per-console lock is used by printing kthreads to synchronize - * this console with callers of console_lock(). This is necessary in - * order to allow printing kthreads to run in parallel to each other, - * while each safely accessing the @blocked field and synchronizing - * against direct printing via console_lock/console_unlock. - * - * Note: For synchronizing against direct printing via - * console_trylock/console_unlock, see the static global - * variable @console_kthreads_active. - */ - struct mutex lock; void *data; struct console *next; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index dfd1a19b95d6..ae489dc685a1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -223,33 +223,6 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; -/* - * Used to synchronize printing kthreads against direct printing via - * console_trylock/console_unlock. - * - * Values: - * -1 = console kthreads atomically blocked (via global trylock) - * 0 = no kthread printing, console not locked (via trylock) - * >0 = kthread(s) actively printing - * - * Note: For synchronizing against direct printing via - * console_lock/console_unlock, see the @lock variable in - * struct console. - */ -static atomic_t console_kthreads_active = ATOMIC_INIT(0); - -#define console_kthreads_atomic_tryblock() \ - (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) -#define console_kthreads_atomic_unblock() \ - atomic_cmpxchg(&console_kthreads_active, -1, 0) -#define console_kthreads_atomically_blocked() \ - (atomic_read(&console_kthreads_active) == -1) - -#define console_kthread_printing_tryenter() \ - atomic_inc_unless_negative(&console_kthreads_active) -#define console_kthread_printing_exit() \ - atomic_dec(&console_kthreads_active) - /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -297,49 +270,6 @@ static bool panic_in_progress(void) return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); } -/* - * Tracks whether kthread printers are all blocked. A value of true implies - * that the console is locked via console_lock() or the console is suspended. - * Writing to this variable requires holding @console_sem. - */ -static bool console_kthreads_blocked; - -/* - * Block all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_block(void) -{ - struct console *con; - - for_each_console(con) { - mutex_lock(&con->lock); - con->blocked = true; - mutex_unlock(&con->lock); - } - - console_kthreads_blocked = true; -} - -/* - * Unblock all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_unblock(void) -{ - struct console *con; - - for_each_console(con) { - mutex_lock(&con->lock); - con->blocked = false; - mutex_unlock(&con->lock); - } - - console_kthreads_blocked = false; -} - /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -2673,6 +2603,13 @@ void resume_console(void) down_console_sem(); console_suspended = 0; console_unlock(); + + /* + * While suspended, new records may have been added to the + * ringbuffer. Wake up the kthread printers to print them. + */ + wake_up_klogd(); + pr_flush(1000, true); } @@ -2691,14 +2628,9 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - else { - /* - * If a new CPU comes online, the conditions for - * printer_should_wake() may have changed for some - * kthread printer with !CON_ANYTIME. - */ - wake_up_klogd(); - } + + /* Wake kthread printers. Some may have become usable. */ + wake_up_klogd(); } return 0; } @@ -2718,7 +2650,6 @@ void console_lock(void) down_console_sem(); if (console_suspended) return; - console_kthreads_block(); console_locked = 1; console_may_schedule = 1; } @@ -2740,10 +2671,6 @@ int console_trylock(void) up_console_sem(); return 0; } - if (!console_kthreads_atomic_tryblock()) { - up_console_sem(); - return 0; - } console_locked = 1; console_may_schedule = 0; return 1; @@ -2752,7 +2679,7 @@ EXPORT_SYMBOL(console_trylock); int is_console_locked(void) { - return (console_locked || atomic_read(&console_kthreads_active)); + return console_locked; } EXPORT_SYMBOL(is_console_locked); @@ -2796,7 +2723,7 @@ static inline bool __console_is_usable(short flags) * Check if the given console is currently capable and allowed to print * records. * - * Requires holding the console_lock. + * Requires the console_lock. */ static inline bool console_is_usable(struct console *con) { @@ -2809,22 +2736,6 @@ static inline bool console_is_usable(struct console *con) static void __console_unlock(void) { console_locked = 0; - - /* - * Depending on whether console_lock() or console_trylock() was used, - * appropriately allow the kthread printers to continue. - */ - if (console_kthreads_blocked) - console_kthreads_unblock(); - else - console_kthreads_atomic_unblock(); - - /* - * New records may have arrived while the console was locked. - * Wake the kthread printers to print them. - */ - wake_up_klogd(); - up_console_sem(); } @@ -2842,19 +2753,17 @@ static void __console_unlock(void) * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. A NULL pointer may be provided - * to disable allowing the console_lock to be taken over by a printk waiter. + * console_lock. Otherwise it is set to false. * * Returns false if the given console has no next record to print, otherwise * true. * - * Requires the console_lock if @handover is non-NULL. - * Requires con->lock otherwise. + * Requires the console_lock. */ -static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool *handover) { - static atomic_t panic_console_dropped = ATOMIC_INIT(0); + static int panic_console_dropped; struct printk_info info; struct printk_record r; unsigned long flags; @@ -2863,8 +2772,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - if (handover) - *handover = false; + *handover = false; if (!prb_read_valid(prb, con->seq, &r)) return false; @@ -2872,8 +2780,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex if (con->seq != r.info->seq) { con->dropped += r.info->seq - con->seq; con->seq = r.info->seq; - if (panic_in_progress() && - atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { + if (panic_in_progress() && panic_console_dropped++ > 10) { suppress_panic_printk = 1; pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); } @@ -2895,61 +2802,31 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } - if (handover) { - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); - - /* don't trace irqsoff print latency */ - stop_critical_timings(); - } + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); /* don't trace print latency */ call_console_driver(con, write_text, len, dropped_text); + start_critical_timings(); con->seq++; - if (handover) { - start_critical_timings(); - *handover = console_lock_spinning_disable_and_check(); - printk_safe_exit_irqrestore(flags); - } + *handover = console_lock_spinning_disable_and_check(); + printk_safe_exit_irqrestore(flags); skip: return true; } -/* - * Print a record for a given console, but allow another printk() caller to - * take over the console_lock and continue printing. - * - * Requires the console_lock, but depending on @handover after the call, the - * caller may no longer have the console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) -{ - /* - * Handovers are only supported if threaded printers are atomically - * blocked. The context taking over the console_lock may be atomic. - */ - if (!console_kthreads_atomically_blocked()) { - *handover = false; - handover = NULL; - } - - return __console_emit_next_record(con, text, ext_text, dropped_text, handover); -} - /* * Print out all remaining records to all consoles. * @@ -3001,11 +2878,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove if (con->flags & CON_EXTENDED) { /* Extended consoles do not print "dropped messages". */ - progress = console_emit_next_record_transferable(con, &text[0], - &ext_text[0], NULL, handover); + progress = console_emit_next_record(con, &text[0], + &ext_text[0], NULL, + handover); } else { - progress = console_emit_next_record_transferable(con, &text[0], - NULL, &dropped_text[0], handover); + progress = console_emit_next_record(con, &text[0], + NULL, &dropped_text[0], + handover); } if (*handover) return false; @@ -3120,10 +2999,6 @@ void console_unblank(void) if (oops_in_progress) { if (down_trylock_console_sem() != 0) return; - if (!console_kthreads_atomic_tryblock()) { - up_console_sem(); - return; - } } else console_lock(); @@ -3206,6 +3081,10 @@ void console_start(struct console *console) console_lock(); console->flags |= CON_ENABLED; console_unlock(); + + /* Wake the newly enabled kthread printer. */ + wake_up_klogd(); + __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); @@ -3407,8 +3286,6 @@ void register_console(struct console *newcon) newcon->dropped = 0; newcon->thread = NULL; - newcon->blocked = true; - mutex_init(&newcon->lock); if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ @@ -3709,19 +3586,6 @@ static void printk_fallback_preferred_direct(void) console_unlock(); } -/* - * Print a record for a given console, not allowing another printk() caller - * to take over. This is appropriate for contexts that do not have the - * console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text) -{ - return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); -} - static bool printer_should_wake(struct console *con, u64 seq) { short flags; @@ -3729,10 +3593,8 @@ static bool printer_should_wake(struct console *con, u64 seq) if (kthread_should_stop() || !printk_kthreads_available) return true; - if (con->blocked || - console_kthreads_atomically_blocked()) { + if (console_suspended) return false; - } /* * This is an unsafe read from con->flags, but a false positive is @@ -3753,6 +3615,7 @@ static int printk_kthread_func(void *data) struct console *con = data; char *dropped_text = NULL; char *ext_text = NULL; + bool handover; u64 seq = 0; char *text; int error; @@ -3802,27 +3665,15 @@ static int printk_kthread_func(void *data) if (error) continue; - error = mutex_lock_interruptible(&con->lock); - if (error) - continue; + console_lock(); - if (con->blocked || - !console_kthread_printing_tryenter()) { - /* Another context has locked the console_lock. */ - mutex_unlock(&con->lock); + if (console_suspended) { + up_console_sem(); continue; } - /* - * Although this context has not locked the console_lock, it - * is known that the console_lock is not locked and it is not - * possible for any other context to lock the console_lock. - * Therefore it is safe to read con->flags. - */ - - if (!__console_is_usable(con->flags)) { - console_kthread_printing_exit(); - mutex_unlock(&con->lock); + if (!console_is_usable(con)) { + __console_unlock(); continue; } @@ -3835,13 +3686,13 @@ static int printk_kthread_func(void *data) * which can conditionally invoke cond_resched(). */ console_may_schedule = 0; - console_emit_next_record(con, text, ext_text, dropped_text); + console_emit_next_record(con, text, ext_text, dropped_text, &handover); + if (handover) + continue; seq = con->seq; - console_kthread_printing_exit(); - - mutex_unlock(&con->lock); + __console_unlock(); } con_printk(KERN_INFO, con, "printing thread stopped\n"); -- cgit v1.2.3 From 5831788afb17b89c5b531fb60cbd798613ccbb63 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:56 +0200 Subject: Revert "printk: add kthread console printers" This reverts commit 09c5ba0aa2fcfdadb17d045c3ee6f86d69270df7. This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-6-pmladek@suse.com --- include/linux/console.h | 2 - kernel/printk/printk.c | 329 ++++-------------------------------------------- 2 files changed, 22 insertions(+), 309 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 9a251e70c090..8c1686e2c233 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,8 +153,6 @@ struct console { uint ospeed; u64 seq; unsigned long dropped; - struct task_struct *thread; - void *data; struct console *next; }; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ae489dc685a1..5a6273e56b4e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -361,13 +361,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); -/* - * A flag to signify if printk_activate_kthreads() has already started the - * kthread printers. If true, any later registered consoles must start their - * own kthread directly. The flag is write protected by the console_lock. - */ -static bool printk_kthreads_available; - #ifdef CONFIG_PRINTK static atomic_t printk_prefer_direct = ATOMIC_INIT(0); @@ -397,39 +390,6 @@ void printk_prefer_direct_exit(void) WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); } -/* - * Calling printk() always wakes kthread printers so that they can - * flush the new message to their respective consoles. Also, if direct - * printing is allowed, printk() tries to flush the messages directly. - * - * Direct printing is allowed in situations when the kthreads - * are not available or the system is in a problematic state. - * - * See the implementation about possible races. - */ -static inline bool allow_direct_printing(void) -{ - /* - * Checking kthread availability is a possible race because the - * kthread printers can become permanently disabled during runtime. - * However, doing that requires holding the console_lock, so any - * pending messages will be direct printed by console_unlock(). - */ - if (!printk_kthreads_available) - return true; - - /* - * Prefer direct printing when the system is in a problematic state. - * The context that sets this state will always see the updated value. - * The other contexts do not care. Anyway, direct printing is just a - * best effort. The direct output is only possible when console_lock - * is not already taken and no kthread printers are actively printing. - */ - return (system_state > SYSTEM_RUNNING || - oops_in_progress || - atomic_read(&printk_prefer_direct)); -} - DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -2320,10 +2280,10 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched && allow_direct_printing()) { + if (!in_sched) { /* * The caller may be holding system-critical or - * timing-sensitive locks. Disable preemption during direct + * timing-sensitive locks. Disable preemption during * printing of all remaining records to all consoles so that * this context can return as soon as possible. Hopefully * another printk() caller will take over the printing. @@ -2366,8 +2326,6 @@ EXPORT_SYMBOL(_printk); static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); -static void printk_start_kthread(struct console *con); - #else /* CONFIG_PRINTK */ #define CONSOLE_LOG_MAX 0 @@ -2401,8 +2359,6 @@ static void call_console_driver(struct console *con, const char *text, size_t le } static bool suppress_message_printing(int level) { return false; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } -static void printk_start_kthread(struct console *con) { } -static bool allow_direct_printing(void) { return true; } #endif /* CONFIG_PRINTK */ @@ -2603,13 +2559,6 @@ void resume_console(void) down_console_sem(); console_suspended = 0; console_unlock(); - - /* - * While suspended, new records may have been added to the - * ringbuffer. Wake up the kthread printers to print them. - */ - wake_up_klogd(); - pr_flush(1000, true); } @@ -2628,9 +2577,6 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - - /* Wake kthread printers. Some may have become usable. */ - wake_up_klogd(); } return 0; } @@ -2702,9 +2648,18 @@ static bool abandon_console_lock_in_panic(void) return atomic_read(&panic_cpu) != raw_smp_processor_id(); } -static inline bool __console_is_usable(short flags) +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires the console_lock. + */ +static inline bool console_is_usable(struct console *con) { - if (!(flags & CON_ENABLED)) + if (!(con->flags & CON_ENABLED)) + return false; + + if (!con->write) return false; /* @@ -2713,26 +2668,12 @@ static inline bool __console_is_usable(short flags) * cope (CON_ANYTIME) don't call them until this CPU is officially up. */ if (!cpu_online(raw_smp_processor_id()) && - !(flags & CON_ANYTIME)) + !(con->flags & CON_ANYTIME)) return false; return true; } -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_lock. - */ -static inline bool console_is_usable(struct console *con) -{ - if (!con->write) - return false; - - return __console_is_usable(con->flags); -} - static void __console_unlock(void) { console_locked = 0; @@ -2845,8 +2786,8 @@ skip: * were flushed to all usable consoles. A returned false informs the caller * that everything was not flushed (either there were no usable consoles or * another context has taken over printing or it is a panic situation and this - * is not the panic CPU or direct printing is not preferred). Regardless the - * reason, the caller should assume it is not useful to immediately try again. + * is not the panic CPU). Regardless the reason, the caller should assume it + * is not useful to immediately try again. * * Requires the console_lock. */ @@ -2863,10 +2804,6 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove *handover = false; do { - /* Let the kthread printers do the work if they can. */ - if (!allow_direct_printing()) - return false; - any_progress = false; for_each_console(con) { @@ -3081,10 +3018,6 @@ void console_start(struct console *console) console_lock(); console->flags |= CON_ENABLED; console_unlock(); - - /* Wake the newly enabled kthread printer. */ - wake_up_klogd(); - __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); @@ -3285,8 +3218,6 @@ void register_console(struct console *newcon) nr_ext_console_drivers++; newcon->dropped = 0; - newcon->thread = NULL; - if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); @@ -3296,10 +3227,6 @@ void register_console(struct console *newcon) /* Begin with next message. */ newcon->seq = prb_next_seq(prb); } - - if (printk_kthreads_available) - printk_start_kthread(newcon); - console_unlock(); console_sysfs_notify(); @@ -3326,7 +3253,6 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { - struct task_struct *thd; struct console *con; int res; @@ -3367,20 +3293,7 @@ int unregister_console(struct console *console) console_drivers->flags |= CON_CONSDEV; console->flags &= ~CON_ENABLED; - - /* - * console->thread can only be cleared under the console lock. But - * stopping the thread must be done without the console lock. The - * task that clears @thread is the task that stops the kthread. - */ - thd = console->thread; - console->thread = NULL; - console_unlock(); - - if (thd) - kthread_stop(thd); - console_sysfs_notify(); if (console->exit) @@ -3476,20 +3389,6 @@ static int __init printk_late_init(void) } late_initcall(printk_late_init); -static int __init printk_activate_kthreads(void) -{ - struct console *con; - - console_lock(); - printk_kthreads_available = true; - for_each_console(con) - printk_start_kthread(con); - console_unlock(); - - return 0; -} -early_initcall(printk_activate_kthreads); - #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) @@ -3564,180 +3463,11 @@ bool pr_flush(int timeout_ms, bool reset_on_progress) } EXPORT_SYMBOL(pr_flush); -static void __printk_fallback_preferred_direct(void) -{ - printk_prefer_direct_enter(); - pr_err("falling back to preferred direct printing\n"); - printk_kthreads_available = false; -} - -/* - * Enter preferred direct printing, but never exit. Mark console threads as - * unavailable. The system is then forever in preferred direct printing and - * any printing threads will exit. - * - * Must *not* be called under console_lock. Use - * __printk_fallback_preferred_direct() if already holding console_lock. - */ -static void printk_fallback_preferred_direct(void) -{ - console_lock(); - __printk_fallback_preferred_direct(); - console_unlock(); -} - -static bool printer_should_wake(struct console *con, u64 seq) -{ - short flags; - - if (kthread_should_stop() || !printk_kthreads_available) - return true; - - if (console_suspended) - return false; - - /* - * This is an unsafe read from con->flags, but a false positive is - * not a problem. Worst case it would allow the printer to wake up - * although it is disabled. But the printer will notice that when - * attempting to print and instead go back to sleep. - */ - flags = data_race(READ_ONCE(con->flags)); - - if (!__console_is_usable(flags)) - return false; - - return prb_read_valid(prb, seq, NULL); -} - -static int printk_kthread_func(void *data) -{ - struct console *con = data; - char *dropped_text = NULL; - char *ext_text = NULL; - bool handover; - u64 seq = 0; - char *text; - int error; - - text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); - if (!text) { - con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - - if (con->flags & CON_EXTENDED) { - ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); - if (!ext_text) { - con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - } else { - dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); - if (!dropped_text) { - con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); - printk_fallback_preferred_direct(); - goto out; - } - } - - con_printk(KERN_INFO, con, "printing thread started\n"); - - for (;;) { - /* - * Guarantee this task is visible on the waitqueue before - * checking the wake condition. - * - * The full memory barrier within set_current_state() of - * prepare_to_wait_event() pairs with the full memory barrier - * within wq_has_sleeper(). - * - * This pairs with __wake_up_klogd:A. - */ - error = wait_event_interruptible(log_wait, - printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ - - if (kthread_should_stop() || !printk_kthreads_available) - break; - - if (error) - continue; - - console_lock(); - - if (console_suspended) { - up_console_sem(); - continue; - } - - if (!console_is_usable(con)) { - __console_unlock(); - continue; - } - - /* - * Even though the printk kthread is always preemptible, it is - * still not allowed to call cond_resched() from within - * console drivers. The task may become non-preemptible in the - * console driver call chain. For example, vt_console_print() - * takes a spinlock and then can call into fbcon_redraw(), - * which can conditionally invoke cond_resched(). - */ - console_may_schedule = 0; - console_emit_next_record(con, text, ext_text, dropped_text, &handover); - if (handover) - continue; - - seq = con->seq; - - __console_unlock(); - } - - con_printk(KERN_INFO, con, "printing thread stopped\n"); -out: - kfree(dropped_text); - kfree(ext_text); - kfree(text); - - console_lock(); - /* - * If this kthread is being stopped by another task, con->thread will - * already be NULL. That is fine. The important thing is that it is - * NULL after the kthread exits. - */ - con->thread = NULL; - console_unlock(); - - return 0; -} - -/* Must be called under console_lock. */ -static void printk_start_kthread(struct console *con) -{ - /* - * Do not start a kthread if there is no write() callback. The - * kthreads assume the write() callback exists. - */ - if (!con->write) - return; - - con->thread = kthread_run(printk_kthread_func, con, - "pr/%s%d", con->name, con->index); - if (IS_ERR(con->thread)) { - con->thread = NULL; - con_printk(KERN_ERR, con, "unable to start printing thread\n"); - __printk_fallback_preferred_direct(); - return; - } -} - /* * Delayed printk version, for scheduler-internal messages: */ -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); @@ -3745,14 +3475,10 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { - printk_prefer_direct_enter(); - + if (pending & PRINTK_PENDING_OUTPUT) { /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); - - printk_prefer_direct_exit(); } if (pending & PRINTK_PENDING_WAKEUP) @@ -3777,11 +3503,10 @@ static void __wake_up_klogd(int val) * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * - * This pairs with devkmsg_read:A, syslog_print:A, and - * printk_kthread_func:A. + * This pairs with devkmsg_read:A and syslog_print:A. */ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ - (val & PRINTK_PENDING_DIRECT_OUTPUT)) { + (val & PRINTK_PENDING_OUTPUT)) { this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } @@ -3799,17 +3524,7 @@ void defer_console_output(void) * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ - int val = PRINTK_PENDING_WAKEUP; - - /* - * Make sure that some context will print the messages when direct - * printing is allowed. This happens in situations when the kthreads - * may not be as reliable or perhaps unusable. - */ - if (allow_direct_printing()) - val |= PRINTK_PENDING_DIRECT_OUTPUT; - - __wake_up_klogd(val); + __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); } void printk_trigger_flush(void) -- cgit v1.2.3 From 07a22b61946f0b80065b0ddcc703b715f84355f5 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:57 +0200 Subject: Revert "printk: add functions to prefer direct printing" This reverts commit 2bb2b7b57f81255c13f4395ea911d6bdc70c9fe2. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-7-pmladek@suse.com --- drivers/tty/sysrq.c | 2 -- include/linux/printk.h | 11 ----------- kernel/hung_task.c | 11 +---------- kernel/panic.c | 4 ---- kernel/printk/printk.c | 28 ---------------------------- kernel/rcu/tree_stall.h | 2 -- kernel/reboot.c | 14 +------------- kernel/watchdog.c | 4 ---- kernel/watchdog_hld.c | 4 ---- 9 files changed, 2 insertions(+), 78 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 2884cd638d64..bbfd004449b5 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -578,7 +578,6 @@ void __handle_sysrq(int key, bool check_mask) rcu_sysrq_start(); rcu_read_lock(); - printk_prefer_direct_enter(); /* * Raise the apparent loglevel to maximum so that the sysrq header * is shown to provide the user with positive feedback. We do not @@ -620,7 +619,6 @@ void __handle_sysrq(int key, bool check_mask) pr_cont("\n"); console_loglevel = orig_log_level; } - printk_prefer_direct_exit(); rcu_read_unlock(); rcu_sysrq_end(); diff --git a/include/linux/printk.h b/include/linux/printk.h index cd26aab0ab2a..091fba7283e1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -170,9 +170,6 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit -extern void printk_prefer_direct_enter(void); -extern void printk_prefer_direct_exit(void); - extern bool pr_flush(int timeout_ms, bool reset_on_progress); /* @@ -225,14 +222,6 @@ static inline void printk_deferred_exit(void) { } -static inline void printk_prefer_direct_enter(void) -{ -} - -static inline void printk_prefer_direct_exit(void) -{ -} - static inline bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 02a65d554340..52501e5f7655 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,8 +127,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * complain: */ if (sysctl_hung_task_warnings) { - printk_prefer_direct_enter(); - if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", @@ -144,8 +142,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_all_cpu_backtrace) hung_task_show_all_bt = true; - - printk_prefer_direct_exit(); } touch_nmi_watchdog(); @@ -208,17 +204,12 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } unlock: rcu_read_unlock(); - if (hung_task_show_lock) { - printk_prefer_direct_enter(); + if (hung_task_show_lock) debug_show_all_locks(); - printk_prefer_direct_exit(); - } if (hung_task_show_all_bt) { hung_task_show_all_bt = false; - printk_prefer_direct_enter(); trigger_all_cpu_backtrace(); - printk_prefer_direct_exit(); } if (hung_task_call_panic) diff --git a/kernel/panic.c b/kernel/panic.c index 6737b2332275..8355b19676f8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -579,8 +579,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint, { disable_trace_on_warning(); - printk_prefer_direct_enter(); - if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", raw_smp_processor_id(), current->pid, file, line, @@ -610,8 +608,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint, /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); - - printk_prefer_direct_exit(); } #ifndef __WARN_FLAGS diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a6273e56b4e..b49c6ff6dca0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -362,34 +362,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; static DEFINE_MUTEX(syslog_lock); #ifdef CONFIG_PRINTK -static atomic_t printk_prefer_direct = ATOMIC_INIT(0); - -/** - * printk_prefer_direct_enter - cause printk() calls to attempt direct - * printing to all enabled consoles - * - * Since it is not possible to call into the console printing code from any - * context, there is no guarantee that direct printing will occur. - * - * This globally effects all printk() callers. - * - * Context: Any context. - */ -void printk_prefer_direct_enter(void) -{ - atomic_inc(&printk_prefer_direct); -} - -/** - * printk_prefer_direct_exit - restore printk() behavior - * - * Context: Any context. - */ -void printk_prefer_direct_exit(void) -{ - WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); -} - DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4995c078cff9..a001e1e7a992 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -647,7 +647,6 @@ static void print_cpu_stall(unsigned long gps) * See Documentation/RCU/stallwarn.rst for info on how to debug * RCU CPU stall warnings. */ - printk_prefer_direct_enter(); trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); @@ -685,7 +684,6 @@ static void print_cpu_stall(unsigned long gps) */ set_tsk_need_resched(current); set_preempt_need_resched(); - printk_prefer_direct_exit(); } static void check_cpu_stall(struct rcu_data *rdp) diff --git a/kernel/reboot.c b/kernel/reboot.c index 4177645e74d6..6bcc5d6a6572 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -447,11 +447,9 @@ static int __orderly_reboot(void) ret = run_cmd(reboot_cmd); if (ret) { - printk_prefer_direct_enter(); pr_warn("Failed to start orderly reboot: forcing the issue\n"); emergency_sync(); kernel_restart(NULL); - printk_prefer_direct_exit(); } return ret; @@ -464,7 +462,6 @@ static int __orderly_poweroff(bool force) ret = run_cmd(poweroff_cmd); if (ret && force) { - printk_prefer_direct_enter(); pr_warn("Failed to start orderly shutdown: forcing the issue\n"); /* @@ -474,7 +471,6 @@ static int __orderly_poweroff(bool force) */ emergency_sync(); kernel_power_off(); - printk_prefer_direct_exit(); } return ret; @@ -532,8 +528,6 @@ EXPORT_SYMBOL_GPL(orderly_reboot); */ static void hw_failure_emergency_poweroff_func(struct work_struct *work) { - printk_prefer_direct_enter(); - /* * We have reached here after the emergency shutdown waiting period has * expired. This means orderly_poweroff has not been able to shut off @@ -550,8 +544,6 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) */ pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); emergency_restart(); - - printk_prefer_direct_exit(); } static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, @@ -590,13 +582,11 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) { static atomic_t allow_proceed = ATOMIC_INIT(1); - printk_prefer_direct_enter(); - pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); /* Shutdown should be initiated only once. */ if (!atomic_dec_and_test(&allow_proceed)) - goto out; + return; /* * Queue a backup emergency shutdown in the event of @@ -604,8 +594,6 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) */ hw_failure_emergency_poweroff(ms_until_forced); orderly_poweroff(true); -out: - printk_prefer_direct_exit(); } EXPORT_SYMBOL_GPL(hw_protection_shutdown); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 40024e03d422..9166220457bc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -424,8 +424,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* Start period for the next softlockup warning. */ update_report_ts(); - printk_prefer_direct_enter(); - pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -444,8 +442,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); - - printk_prefer_direct_exit(); } return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 701f35f0e2d4..247bf0b1582c 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,8 +135,6 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - printk_prefer_direct_enter(); - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); @@ -157,8 +155,6 @@ static void watchdog_overflow_callback(struct perf_event *event, if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); - printk_prefer_direct_exit(); - __this_cpu_write(hard_watchdog_warn, true); return; } -- cgit v1.2.3 From 6945b2141fc9429f30fd6df2bcdd657a7f309f2f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Jun 2022 07:15:17 -0400 Subject: MAINTAINERS: Reorganize KVM/x86 maintainership For the last few years I have been the sole maintainer of KVM, albeit getting serious help from all the people who have reviewed hundreds of patches. The volume of KVM x86 alone has gotten to the point where one maintainer is not enough; especially if that maintainer is not doing it full time and if they want to keep up with the evolution of ARM64 and RISC-V at both the architecture and the hypervisor level. So, this patch is the first step in restoring double maintainership or even transitioning to the submaintainer model of other architectures. The changes here were mostly proposed by Sean offlist and they are twofold: - revisiting the set of KVM x86 reviewers. It's important to have an an accurate list of people that are actively reviewing patches ("R"), as well as people that are able to act on bug reports ("M"). Otherwise, voids to be filled are not easily visible. The proposal is to split KVM on Hyper-V, which is where Vitaly has been the main contributor for quite some time now; likewise for KVM paravirt support, which has been the main interest of Wanpeng and to which Vitaly has also contributed (e.g., for async page faults). Jim and Joerg have not been particularly active (though Joerg has worked on guest support for AMD SEV); knowing them a bit, I can't imagine they would object to their removal or even be surprised, but please speak up if you do. - promoting Sean to maintainer for KVM x86 host support. While for now this changes little, let's treat it as a harbinger for future changes. The plan is that I would keep the final integration testing for quite some time, and probably focus more on -rc work. This will give me more time to clean up my ad hoc setup and moving towards a more public CI, with Sean focusing instead on next-release patches, and the testing up to where kvm-unit-tests and selftests pass. In order to facilitate collaboration between Sean and myself, we'll also formalize a bit more the various branches of kvm.git. Nothing is going to change with respect to handling pull requests to Linus and from other architectures, as well as maintainance of the generic code (which I expect and hope to be more important as architectures try to share more code) and documentation. However, it's not a coincidence that my entry is now the last for x86, ready to be demoted to reviewer if/when the right time comes. Suggested-by: Sean Christopherson Co-developed-by: Sean Christopherson Cc: kvm@vger.kernel.org Cc: Sean Christopherson Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Acked-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- MAINTAINERS | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 97014ae3e5ed..bb7f42229d9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10897,28 +10897,51 @@ F: tools/testing/selftests/kvm/*/s390x/ F: tools/testing/selftests/kvm/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) +M: Sean Christopherson M: Paolo Bonzini -R: Sean Christopherson -R: Vitaly Kuznetsov -R: Wanpeng Li -R: Jim Mattson -R: Joerg Roedel L: kvm@vger.kernel.org S: Supported -W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git F: arch/x86/include/asm/kvm* -F: arch/x86/include/asm/pvclock-abi.h F: arch/x86/include/asm/svm.h F: arch/x86/include/asm/vmx*.h F: arch/x86/include/uapi/asm/kvm* F: arch/x86/include/uapi/asm/svm.h F: arch/x86/include/uapi/asm/vmx.h -F: arch/x86/kernel/kvm.c -F: arch/x86/kernel/kvmclock.c F: arch/x86/kvm/ F: arch/x86/kvm/*/ +KVM PARAVIRT (KVM/paravirt) +M: Paolo Bonzini +R: Wanpeng Li +R: Vitaly Kuznetsov +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kernel/kvm.c +F: arch/x86/kernel/kvmclock.c +F: arch/x86/include/asm/pvclock-abi.h +F: include/linux/kvm_para.h +F: include/uapi/linux/kvm_para.h +F: include/uapi/asm-generic/kvm_para.h +F: include/asm-generic/kvm_para.h +F: arch/um/include/asm/kvm_para.h +F: arch/x86/include/asm/kvm_para.h +F: arch/x86/include/uapi/asm/kvm_para.h + +KVM X86 HYPER-V (KVM/hyper-v) +M: Vitaly Kuznetsov +M: Sean Christopherson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kvm/hyperv.* +F: arch/x86/kvm/kvm_onhyperv.* +F: arch/x86/kvm/svm/hyperv.* +F: arch/x86/kvm/svm/svm_onhyperv.* +F: arch/x86/kvm/vmx/evmcs.* + KERNFS M: Greg Kroah-Hartman M: Tejun Heo -- cgit v1.2.3 From 386e4fb6962b9f248a80f8870aea0870ca603e89 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 23 Jun 2022 11:06:43 -0600 Subject: io_uring: use original request task for inflight tracking In prior kernels, we did file assignment always at prep time. This meant that req->task == current. But after deferring that assignment and then pushing the inflight tracking back in, we've got the inflight tracking using current when it should in fact now be using req->task. Fixup that error introduced by adding the inflight tracking back after file assignments got modifed. Fixes: 9cae36a094e7 ("io_uring: reinstate the inflight tracking") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5c95755619e2..5ff2cdb425bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1975,7 +1975,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req) { if (!(req->flags & REQ_F_INFLIGHT)) { req->flags |= REQ_F_INFLIGHT; - atomic_inc(¤t->io_uring->inflight_tracked); + atomic_inc(&req->task->io_uring->inflight_tracked); } } -- cgit v1.2.3 From 61b6e2e5321da281ab3c0c04e1962b3d000f6248 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Jun 2022 21:20:05 +0800 Subject: dm: fix BLK_STS_DM_REQUEUE handling when dm_io represents split bio Commit 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting") removed using cloned bio when dm io splitting is needed. Using bio_trim()+bio_inc_remaining() rather than bio_split()+bio_chain() causes multiple dm_io instances to share the same original bio, and it works fine if IOs are completed successfully. But a regression was caused for the case when BLK_STS_DM_REQUEUE is returned from any one of DM's cloned bios (whose dm_io share the same orig_bio). In this BLK_STS_DM_REQUEUE case only the mapped subset of the original bio for the current exact dm_io needs to be re-submitted. However, since the original bio is shared among all dm_io instances, the ->orig_bio actually only represents the last dm_io instance, so requeue can't work as expected. Also when more than one dm_io is requeued, the same original bio is requeued from all dm_io's completion handler, then race is caused. Fix this issue by still allocating one clone bio for completing io only, then io accounting can rely on ->orig_bio being unmodified. This is needed because the dm_io's sector_offset and sectors members are recorded relative to an unmodified ->orig_bio. In the future, we can go back to using bio_trim()+bio_inc_remaining() for dm's io splitting but then delay needing a bio clone only when handling BLK_STS_DM_REQUEUE, but that approach is a bit complicated (so it needs a development cycle): 1) bio clone needs to be done in task context 2) a block interface for unwinding bio is required Fixes: 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting") Reported-by: Benjamin Marzinski Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm.c | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 54c0473a51dd..c954ff91870e 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -272,6 +272,7 @@ struct dm_io { atomic_t io_count; struct mapped_device *md; + struct bio *split_bio; /* The three fields represent mapped part of original bio */ struct bio *orig_bio; unsigned int sector_offset; /* offset to end of orig_bio */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9ede55278eec..2b75f1ef7386 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -594,6 +594,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) atomic_set(&io->io_count, 2); this_cpu_inc(*md->pending_io); io->orig_bio = bio; + io->split_bio = NULL; io->md = md; spin_lock_init(&io->lock); io->start_time = jiffies; @@ -887,7 +888,7 @@ static void dm_io_complete(struct dm_io *io) { blk_status_t io_error; struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; + struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio; if (io->status == BLK_STS_DM_REQUEUE) { unsigned long flags; @@ -1693,9 +1694,11 @@ static void dm_split_and_process_bio(struct mapped_device *md, * Remainder must be passed to submit_bio_noacct() so it gets handled * *after* bios already submitted have been completely processed. */ - bio_trim(bio, io->sectors, ci.sector_count); - trace_block_split(bio, bio->bi_iter.bi_sector); - bio_inc_remaining(bio); + WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT)); + io->split_bio = bio_split(bio, io->sectors, GFP_NOIO, + &md->queue->bio_split); + bio_chain(io->split_bio, bio); + trace_block_split(io->split_bio, bio->bi_iter.bi_sector); submit_bio_noacct(bio); out: /* -- cgit v1.2.3 From 90736eb3232d208ee048493f371075e4272e0944 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 23 Jun 2022 14:53:25 -0400 Subject: dm mirror log: clear log bits up to BITS_PER_LONG boundary Commit 85e123c27d5c ("dm mirror log: round up region bitmap size to BITS_PER_LONG") introduced a regression on 64-bit architectures in the lvm testsuite tests: lvcreate-mirror, mirror-names and vgsplit-operation. If the device is shrunk, we need to clear log bits beyond the end of the device. The code clears bits up to a 32-bit boundary and then calculates lc->sync_count by summing set bits up to a 64-bit boundary (the commit changed that; previously, this boundary was 32-bit too). So, it was using some non-zeroed bits in the calculation and this caused misbehavior. Fix this regression by clearing bits up to BITS_PER_LONG boundary. Fixes: 85e123c27d5c ("dm mirror log: round up region bitmap size to BITS_PER_LONG") Cc: stable@vger.kernel.org Reported-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 2dda05aada23..0c6620e7b7bf 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -615,7 +615,7 @@ static int disk_resume(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); /* clear any old bits -- device has shrunk */ - for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) + for (i = lc->region_count; i % BITS_PER_LONG; i++) log_clear_bit(lc, lc->clean_bits, i); /* copy clean across to sync */ -- cgit v1.2.3 From c7e1c443584ddd7facffca00e9e23b0d084e5bb3 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Mon, 6 Jun 2022 13:44:24 +0900 Subject: gpio: Fix kernel-doc comments to nested union Commit 48ec13d36d3f ("gpio: Properly document parent data union") is supposed to have fixed a warning from "make htmldocs" regarding kernel-doc comments to union members. However, the same warning still remains [1]. Fix the issue by following the example found in section "Nested structs/unions" of Documentation/doc-guide/kernel-doc.rst. Signed-off-by: Akira Yokosawa Reported-by: Stephen Rothwell Fixes: 48ec13d36d3f ("gpio: Properly document parent data union") Link: https://lore.kernel.org/r/20220606093302.21febee3@canb.auug.org.au/ [1] Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Joey Gouly Cc: Marc Zyngier Tested-by: Stephen Rothwell Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b1e0f1f8ee2e..54c3c6506503 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -167,21 +167,24 @@ struct gpio_irq_chip { */ irq_flow_handler_t parent_handler; - /** - * @parent_handler_data: - * - * If @per_parent_data is false, @parent_handler_data is a single - * pointer used as the data associated with every parent interrupt. - * - * @parent_handler_data_array: - * - * If @per_parent_data is true, @parent_handler_data_array is - * an array of @num_parents pointers, and is used to associate - * different data for each parent. This cannot be NULL if - * @per_parent_data is true. - */ union { + /** + * @parent_handler_data: + * + * If @per_parent_data is false, @parent_handler_data is a + * single pointer used as the data associated with every + * parent interrupt. + */ void *parent_handler_data; + + /** + * @parent_handler_data_array: + * + * If @per_parent_data is true, @parent_handler_data_array is + * an array of @num_parents pointers, and is used to associate + * different data for each parent. This cannot be NULL if + * @per_parent_data is true. + */ void **parent_handler_data_array; }; -- cgit v1.2.3 From b0d473185ba887c798ed0cd6f5abf4075363baa4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 17 Jun 2022 12:38:09 +0200 Subject: gpio: mxs: Fix header comment This driver is about MXS GPIO support. MXC is a different platform. Signed-off-by: Stefan Wahren Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mxs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index c5166cd47c9c..7f59e5d936c2 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ // -// MXC GPIO support. (c) 2008 Daniel Mack +// MXS GPIO support. (c) 2008 Daniel Mack // Copyright 2008 Juergen Beisert, kernel@pengutronix.de // // Based on code from Freescale, -- cgit v1.2.3 From 16d584d2fc8f4ea36203af45a76becd7093586f1 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Jun 2022 12:06:21 +0800 Subject: net/dsa/hirschmann: Add missing of_node_get() in hellcreek_led_setup() of_find_node_by_name() will decrease the refcount of its first arg and we need a of_node_get() to keep refcount balance. Fixes: 7d9ee2e8ff15 ("net: dsa: hellcreek: Add PTP status LEDs") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220622040621.4094304-1-windhl@126.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index 2572c6087bb5..b28baab6d56a 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -300,6 +300,7 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) const char *label, *state; int ret = -EINVAL; + of_node_get(hellcreek->dev->of_node); leds = of_find_node_by_name(hellcreek->dev->of_node, "leds"); if (!leds) { dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); -- cgit v1.2.3 From 7c97bc0128b2eecc703106112679a69d446d1a12 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 22 Jun 2022 20:02:04 -0700 Subject: net: dsa: bcm_sf2: force pause link settings The pause settings reported by the PHY should also be applied to the GMII port status override otherwise the switch will not generate pause frames towards the link partner despite the advertisement saying otherwise. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220623030204.1966851-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 87e81c636339..be0edfa093d0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -878,6 +878,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, if (duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (tx_pause) + reg |= TXFLOW_CNTL; + if (rx_pause) + reg |= RXFLOW_CNTL; + core_writel(priv, reg, offset); } -- cgit v1.2.3 From ad887a507d7386de09a532c5d303ed659eba2cfb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 22 Jun 2022 13:54:16 +0200 Subject: net/ncsi: use proper "mellanox" DT vendor prefix "mlx" Devicetree vendor prefix is not documented and instead "mellanox" should be used. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220622115416.7400-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-manage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 78814417d753..80713febfac6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,7 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && of_get_property(np, "mlx,multi-host", NULL)) + if (np && (of_get_property(np, "mellanox,multi-host", NULL) || + of_get_property(np, "mlx,multi-host", NULL))) ndp->mlx_multi_host = true; } -- cgit v1.2.3 From 1228b34c8d0ecf6de18c4c95d22f60cc8607c50a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jun 2022 15:02:20 +0000 Subject: net: clear msg_get_inq in __sys_recvfrom() and __copy_msghdr_from_user() syzbot reported uninit-value in tcp_recvmsg() [1] Issue here is that msg->msg_get_inq should have been cleared, otherwise tcp_recvmsg() might read garbage and perform more work than needed, or have undefined behavior. Given CONFIG_INIT_STACK_ALL_ZERO=y is probably going to be the default soon, I chose to change __sys_recvfrom() to clear all fields but msghdr.addr which might be not NULL. For __copy_msghdr_from_user(), I added an explicit clear of kmsg->msg_get_inq. [1] BUG: KMSAN: uninit-value in tcp_recvmsg+0x6cf/0xb60 net/ipv4/tcp.c:2557 tcp_recvmsg+0x6cf/0xb60 net/ipv4/tcp.c:2557 inet_recvmsg+0x13a/0x5a0 net/ipv4/af_inet.c:850 sock_recvmsg_nosec net/socket.c:995 [inline] sock_recvmsg net/socket.c:1013 [inline] __sys_recvfrom+0x696/0x900 net/socket.c:2176 __do_sys_recvfrom net/socket.c:2194 [inline] __se_sys_recvfrom net/socket.c:2190 [inline] __x64_sys_recvfrom+0x122/0x1c0 net/socket.c:2190 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Local variable msg created at: __sys_recvfrom+0x81/0x900 net/socket.c:2154 __do_sys_recvfrom net/socket.c:2194 [inline] __se_sys_recvfrom net/socket.c:2190 [inline] __x64_sys_recvfrom+0x122/0x1c0 net/socket.c:2190 CPU: 0 PID: 3493 Comm: syz-executor170 Not tainted 5.19.0-rc3-syzkaller-30868-g4b28366af7d9 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: f94fd25cb0aa ("tcp: pass back data left in socket after receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jens Axboe Tested-by: Alexander Potapenko Link: https://lore.kernel.org/r/20220622150220.1091182-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/socket.c b/net/socket.c index 2bc8773d9dc5..96300cdc0625 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2149,10 +2149,13 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len) { + struct sockaddr_storage address; + struct msghdr msg = { + /* Save some cycles and don't copy the address if not needed */ + .msg_name = addr ? (struct sockaddr *)&address : NULL, + }; struct socket *sock; struct iovec iov; - struct msghdr msg; - struct sockaddr_storage address; int err, err2; int fput_needed; @@ -2163,14 +2166,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, if (!sock) goto out; - msg.msg_control = NULL; - msg.msg_controllen = 0; - /* Save some cycles and don't copy the address if not needed */ - msg.msg_name = addr ? (struct sockaddr *)&address : NULL; - /* We assume all kernel code knows the size of sockaddr_storage */ - msg.msg_namelen = 0; - msg.msg_iocb = NULL; - msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2375,6 +2370,7 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, return -EFAULT; kmsg->msg_control_is_user = true; + kmsg->msg_get_inq = 0; kmsg->msg_control_user = msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; -- cgit v1.2.3 From b968080808f7f28b89aa495b7402ba48eb17ee93 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Wed, 22 Jun 2022 17:02:34 -0700 Subject: selftests/net: pass ipv6_args to udpgso_bench's IPv6 TCP test udpgso_bench.sh has been running its IPv6 TCP test with IPv4 arguments since its initial conmit. Looks like a typo. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Cc: willemb@google.com Signed-off-by: Dimitris Michailidis Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20220623000234.61774-1-dmichail@fungible.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..dc932fd65363 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } -- cgit v1.2.3 From 745bbc0995c25917dfafb645b8efb29813ef9e0b Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Tue, 14 Jun 2022 08:25:50 +0900 Subject: ksmbd: remove duplicate flag set in smb2_write The writethrough flag is set again if is_rdma_channel is false. Signed-off-by: Hyunchul Lee Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e6f4ccc12f49..e35930867893 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6490,6 +6490,7 @@ int smb2_write(struct ksmbd_work *work) goto out; } + ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) writethrough = true; @@ -6505,10 +6506,6 @@ int smb2_write(struct ksmbd_work *work) data_buf = (char *)(((char *)&req->hdr.ProtocolId) + le16_to_cpu(req->DataOffset)); - ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); - if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) - writethrough = true; - ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n", fp->filp->f_path.dentry, offset, length); err = ksmbd_vfs_write(work, fp, data_buf, length, &offset, -- cgit v1.2.3 From 18e39fb960e6a908ac5230b57e3d0d6c25232368 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 19 Jun 2022 22:35:48 +0900 Subject: ksmbd: set the range of bytes to zero without extending file size in FSCTL_ZERO_DATA generic/091, 263 test failed since commit f66f8b94e7f2 ("cifs: when extending a file with falloc we should make files not-sparse"). FSCTL_ZERO_DATA sets the range of bytes to zero without extending file size. The VFS_FALLOCATE_FL_KEEP_SIZE flag should be used even on non-sparse files. Cc: stable@vger.kernel.org Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/vfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index dcdd07c6efff..f194bf764f9f 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1015,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); - return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); + return vfs_fallocate(fp->filp, + FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, + off, len); } int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, -- cgit v1.2.3 From b5e5f9dfc915ff05b41dff56181e1dae101712bd Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 19 Jun 2022 22:37:17 +0900 Subject: ksmbd: check invalid FileOffset and BeyondFinalZero in FSCTL_ZERO_DATA FileOffset should not be greater than BeyondFinalZero in FSCTL_ZERO_DATA. And don't call ksmbd_vfs_zero_data() if length is zero. Cc: stable@vger.kernel.org Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e35930867893..94ab1dcd80e7 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7700,7 +7700,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct file_zero_data_information *zero_data; struct ksmbd_file *fp; - loff_t off, len; + loff_t off, len, bfz; if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, @@ -7717,19 +7717,26 @@ int smb2_ioctl(struct ksmbd_work *work) zero_data = (struct file_zero_data_information *)&req->Buffer[0]; - fp = ksmbd_lookup_fd_fast(work, id); - if (!fp) { - ret = -ENOENT; + off = le64_to_cpu(zero_data->FileOffset); + bfz = le64_to_cpu(zero_data->BeyondFinalZero); + if (off > bfz) { + ret = -EINVAL; goto out; } - off = le64_to_cpu(zero_data->FileOffset); - len = le64_to_cpu(zero_data->BeyondFinalZero) - off; + len = bfz - off; + if (len) { + fp = ksmbd_lookup_fd_fast(work, id); + if (!fp) { + ret = -ENOENT; + goto out; + } - ret = ksmbd_vfs_zero_data(work, fp, off, len); - ksmbd_fd_put(work, fp); - if (ret < 0) - goto out; + ret = ksmbd_vfs_zero_data(work, fp, off, len); + ksmbd_fd_put(work, fp); + if (ret < 0) + goto out; + } break; } case FSCTL_QUERY_ALLOCATED_RANGES: -- cgit v1.2.3 From 40f2f3e94178d45e4ee6078effba2dfc76f6f5ba Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 13 Jun 2022 10:59:57 +0300 Subject: vdpa/mlx5: Update Control VQ callback information The control VQ specific information is stored in the dedicated struct mlx5_control_vq. When the callback is updated through mlx5_vdpa_set_vq_cb(), make sure to update the control VQ struct. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Eli Cohen Message-Id: <20220613075958.511064-1-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang event_cbs[idx] = *cb; + if (is_ctrl_vq_idx(mvdev, idx)) + mvdev->cvq.event_cb = *cb; } static void mlx5_cvq_notify(struct vringh *vring) -- cgit v1.2.3 From ace9252446ec615cd79a5f77d90edb25c0b9d024 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 13 Jun 2022 10:59:58 +0300 Subject: vdpa/mlx5: Initialize CVQ vringh only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, CVQ vringh is initialized inside setup_virtqueues() which is called every time a memory update is done. This is undesirable since it resets all the context of the vring, including the available and used indices. Move the initialization to mlx5_vdpa_set_status() when VIRTIO_CONFIG_S_DRIVER_OK is set. Signed-off-by: Eli Cohen Message-Id: <20220613075958.511064-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 789c078ff1af..e85c1d71f4ed 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2176,7 +2176,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i; @@ -2186,16 +2185,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) goto err_vq; } - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { - err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, - (struct vring_desc *)(uintptr_t)cvq->desc_addr, - (struct vring_avail *)(uintptr_t)cvq->driver_addr, - (struct vring_used *)(uintptr_t)cvq->device_addr); - if (err) - goto err_vq; - } - return 0; err_vq: @@ -2468,6 +2457,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ndev->mvdev.cvq.ready = false; } +static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + + return err; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2480,6 +2484,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_cvq_vring(mvdev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); + goto err_setup; + } err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); -- cgit v1.2.3 From 0e0348ac3f0a6e6606f1aa5acb1803ada913aa3d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 13 Jun 2022 22:52:23 +0300 Subject: vduse: Tie vduse mgmtdev and its device vduse devices are not backed by any real devices such as PCI. Hence it doesn't have any parent device linked to it. Kernel driver model in [1] suggests to avoid an empty device release callback. Hence tie the mgmtdevice object's life cycle to an allocate dummy struct device instead of static one. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/kobject.rst?h=v5.18-rc7#n284 Signed-off-by: Parav Pandit Message-Id: <20220613195223.473966-1-parav@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xie Yongji Acked-by: Jason Wang --- drivers/vdpa/vdpa_user/vduse_dev.c | 60 +++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 776ad7496f53..3bc27de58f46 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1476,16 +1476,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } -static void vduse_mgmtdev_release(struct device *dev) -{ -} - -static struct device vduse_mgmtdev = { - .init_name = "vduse", - .release = vduse_mgmtdev_release, +struct vduse_mgmt_dev { + struct vdpa_mgmt_dev mgmt_dev; + struct device dev; }; -static struct vdpa_mgmt_dev mgmt_dev; +static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { @@ -1510,7 +1506,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; - vdev->vdpa.mdev = &mgmt_dev; + vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; } @@ -1556,34 +1552,52 @@ static struct virtio_device_id id_table[] = { { 0 }, }; -static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vduse_mgmtdev, - .id_table = id_table, - .ops = &vdpa_dev_mgmtdev_ops, -}; +static void vduse_mgmtdev_release(struct device *dev) +{ + struct vduse_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); + kfree(mgmt_dev); +} static int vduse_mgmtdev_init(void) { int ret; - ret = device_register(&vduse_mgmtdev); - if (ret) + vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); + if (!vduse_mgmt) + return -ENOMEM; + + ret = dev_set_name(&vduse_mgmt->dev, "vduse"); + if (ret) { + kfree(vduse_mgmt); return ret; + } - ret = vdpa_mgmtdev_register(&mgmt_dev); + vduse_mgmt->dev.release = vduse_mgmtdev_release; + + ret = device_register(&vduse_mgmt->dev); if (ret) - goto err; + goto dev_reg_err; - return 0; -err: - device_unregister(&vduse_mgmtdev); + vduse_mgmt->mgmt_dev.id_table = id_table; + vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; + vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; + ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); + if (ret) + device_unregister(&vduse_mgmt->dev); + + return ret; + +dev_reg_err: + put_device(&vduse_mgmt->dev); return ret; } static void vduse_mgmtdev_exit(void) { - vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vduse_mgmtdev); + vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); + device_unregister(&vduse_mgmt->dev); } static int vduse_init(void) -- cgit v1.2.3 From a7722890fdfb2aaeb6b02d68bb0f1e411e58d539 Mon Sep 17 00:00:00 2001 From: "huangjie.albert" Date: Fri, 17 Jun 2022 10:04:11 +0800 Subject: virtio_ring : keep used_wrap_counter in vq->last_used_idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the used_wrap_counter and the vq->last_used_idx may get out of sync if they are separate assignment,and interrupt might use an incorrect value to check for the used index. for example:OOB access ksoftirqd may consume the packet and it will call: virtnet_poll -->virtnet_receive -->virtqueue_get_buf_ctx -->virtqueue_get_buf_ctx_packed and in virtqueue_get_buf_ctx_packed: vq->last_used_idx += vq->packed.desc_state[id].num; if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { vq->last_used_idx -= vq->packed.vring.num; vq->packed.used_wrap_counter ^= 1; } if at the same time, there comes a vring interrupt,in vring_interrupt: we will call: vring_interrupt -->more_used -->more_used_packed -->is_used_desc_packed in is_used_desc_packed, the last_used_idx maybe >= vq->packed.vring.num. so this could case a memory out of bounds bug. this patch is to keep the used_wrap_counter in vq->last_used_idx so we can get the correct value to check for used index in interrupt. v3->v4: - use READ_ONCE/WRITE_ONCE to get/set vq->last_used_idx v2->v3: - add inline function to get used_wrap_counter and last_used - when use vq->last_used_idx, only read once if vq->last_used_idx is read twice, the values can be inconsistent. - use last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)) to get the all bits below VRING_PACKED_EVENT_F_WRAP_CTR v1->v2: - reuse the VRING_PACKED_EVENT_F_WRAP_CTR - Remove parameter judgment in is_used_desc_packed, because it can't be illegal Signed-off-by: huangjie.albert Message-Id: <20220617020411.80367-1-huangjie.albert@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 75 +++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 13a7348cedff..ef30465dd39c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -111,7 +111,12 @@ struct vring_virtqueue { /* Number we've added since last sync. */ unsigned int num_added; - /* Last used index we've seen. */ + /* Last used index we've seen. + * for split ring, it just contains last used index + * for packed ring: + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. + */ u16 last_used_idx; /* Hint for event idx: already triggered no need to disable. */ @@ -154,9 +159,6 @@ struct vring_virtqueue { /* Driver ring wrap counter. */ bool avail_wrap_counter; - /* Device ring wrap counter. */ - bool used_wrap_counter; - /* Avail used flags. */ u16 avail_used_flags; @@ -973,6 +975,15 @@ static struct virtqueue *vring_create_virtqueue_split( /* * Packed ring specific functions - *_packed(). */ +static inline bool packed_used_wrap_counter(u16 last_used_idx) +{ + return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); +} + +static inline u16 packed_last_used(u16 last_used_idx) +{ + return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); +} static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, struct vring_desc_extra *extra) @@ -1406,8 +1417,14 @@ static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, static inline bool more_used_packed(const struct vring_virtqueue *vq) { - return is_used_desc_packed(vq, vq->last_used_idx, - vq->packed.used_wrap_counter); + u16 last_used; + u16 last_used_idx; + bool used_wrap_counter; + + last_used_idx = READ_ONCE(vq->last_used_idx); + last_used = packed_last_used(last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + return is_used_desc_packed(vq, last_used, used_wrap_counter); } static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, @@ -1415,7 +1432,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 last_used, id; + u16 last_used, id, last_used_idx; + bool used_wrap_counter; void *ret; START_USE(vq); @@ -1434,7 +1452,9 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, /* Only get used elements after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); - last_used = vq->last_used_idx; + last_used_idx = READ_ONCE(vq->last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + last_used = packed_last_used(last_used_idx); id = le16_to_cpu(vq->packed.vring.desc[last_used].id); *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); @@ -1451,12 +1471,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, ret = vq->packed.desc_state[id].data; detach_buf_packed(vq, id, ctx); - vq->last_used_idx += vq->packed.desc_state[id].num; - if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { - vq->last_used_idx -= vq->packed.vring.num; - vq->packed.used_wrap_counter ^= 1; + last_used += vq->packed.desc_state[id].num; + if (unlikely(last_used >= vq->packed.vring.num)) { + last_used -= vq->packed.vring.num; + used_wrap_counter ^= 1; } + last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); + WRITE_ONCE(vq->last_used_idx, last_used); + /* * If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -1465,9 +1488,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) virtio_store_mb(vq->weak_barriers, &vq->packed.vring.driver->off_wrap, - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR))); + cpu_to_le16(vq->last_used_idx)); LAST_ADD_TIME_INVALID(vq); @@ -1499,9 +1520,7 @@ static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) if (vq->event) { vq->packed.vring.driver->off_wrap = - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR)); + cpu_to_le16(vq->last_used_idx); /* * We need to update event offset and event wrap * counter first before updating event flags. @@ -1518,8 +1537,7 @@ static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) } END_USE(vq); - return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR); + return vq->last_used_idx; } static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) @@ -1537,7 +1555,7 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 used_idx, wrap_counter; + u16 used_idx, wrap_counter, last_used_idx; u16 bufs; START_USE(vq); @@ -1550,9 +1568,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) if (vq->event) { /* TODO: tune this threshold */ bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; - wrap_counter = vq->packed.used_wrap_counter; + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); - used_idx = vq->last_used_idx + bufs; + used_idx = packed_last_used(last_used_idx) + bufs; if (used_idx >= vq->packed.vring.num) { used_idx -= vq->packed.vring.num; wrap_counter ^= 1; @@ -1582,9 +1601,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) */ virtio_mb(vq->weak_barriers); - if (is_used_desc_packed(vq, - vq->last_used_idx, - vq->packed.used_wrap_counter)) { + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); + used_idx = packed_last_used(last_used_idx); + if (is_used_desc_packed(vq, used_idx, wrap_counter)) { END_USE(vq); return false; } @@ -1689,7 +1709,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->notify = notify; vq->weak_barriers = weak_barriers; vq->broken = true; - vq->last_used_idx = 0; + vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); vq->event_triggered = false; vq->num_added = 0; vq->packed_ring = true; @@ -1720,7 +1740,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->packed.next_avail_idx = 0; vq->packed.avail_wrap_counter = 1; - vq->packed.used_wrap_counter = 1; vq->packed.event_flags_shadow = 0; vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; -- cgit v1.2.3 From 03d9571706942fa653a4975709820596d13563c7 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Fri, 17 Jun 2022 01:59:52 -0400 Subject: virtio: Remove unnecessary variable assignments In function vp_modern_probe(), "pci_dev" is initialized with the value of "mdev->pci_dev", so assigning "pci_dev" to "mdev->pci_dev" is unnecessary since they store the same value. Signed-off-by: Bo Liu Message-Id: <20220617055952.5364-1-liubo03@inspur.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio_pci_modern_dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index b790f30b2b56..fa2a9445bb18 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -220,8 +220,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) check_offsets(); - mdev->pci_dev = pci_dev; - /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */ if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f) return -ENODEV; -- cgit v1.2.3 From c346dae4f3fbce51bbd4f2ec5e8c6f9b91e93163 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 22 Jun 2022 09:29:40 +0800 Subject: virtio: disable notification hardening by default We try to harden virtio device notifications in 8b4ec69d7e09 ("virtio: harden vring IRQ"). It works with the assumption that the driver or core can properly call virtio_device_ready() at the right place. Unfortunately, this seems to be not true and uncover various bugs of the existing drivers, mainly the issue of using virtio_device_ready() incorrectly. So let's add a Kconfig option and disable it by default. It gives us time to fix the drivers and then we can consider re-enabling it. Signed-off-by: Jason Wang Message-Id: <20220622012940.21441-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- drivers/s390/virtio/virtio_ccw.c | 9 ++++++++- drivers/virtio/Kconfig | 13 +++++++++++++ drivers/virtio/virtio.c | 2 ++ drivers/virtio/virtio_ring.c | 12 ++++++++++++ include/linux/virtio_config.h | 2 ++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 97e51c34e6cf..161d3b141f0d 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1136,8 +1136,13 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vcdev->err = -EIO; } virtio_ccw_check_activity(vcdev, activity); - /* Interrupts are disabled here */ +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + /* + * Paired with virtio_ccw_synchronize_cbs() and interrupts are + * disabled here. + */ read_lock(&vcdev->irq_lock); +#endif for_each_set_bit(i, indicators(vcdev), sizeof(*indicators(vcdev)) * BITS_PER_BYTE) { /* The bit clear must happen before the vring kick. */ @@ -1146,7 +1151,9 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vq = virtio_ccw_vq_by_ind(vcdev, i); vring_interrupt(0, vq); } +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION read_unlock(&vcdev->irq_lock); +#endif if (test_bit(0, indicators2(vcdev))) { virtio_config_changed(&vcdev->vdev); clear_bit(0, indicators2(vcdev)); diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index a6dc8b5846fe..e1556d2a355a 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -29,6 +29,19 @@ menuconfig VIRTIO_MENU if VIRTIO_MENU +config VIRTIO_HARDEN_NOTIFICATION + bool "Harden virtio notification" + help + Enable this to harden the device notifications and suppress + those that happen at a time where notifications are illegal. + + Experimental: Note that several drivers still have bugs that + may cause crashes or hangs when correct handling of + notifications is enforced; depending on the subset of + drivers and devices you use, this may or may not work. + + If unsure, say N. + config VIRTIO_PCI tristate "PCI driver for virtio devices" depends on PCI diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 6bace84ae37e..7deeed30d1f3 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -219,6 +219,7 @@ static int virtio_features_ok(struct virtio_device *dev) * */ void virtio_reset_device(struct virtio_device *dev) { +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION /* * The below virtio_synchronize_cbs() guarantees that any * interrupt for this line arriving after @@ -227,6 +228,7 @@ void virtio_reset_device(struct virtio_device *dev) */ virtio_break_device(dev); virtio_synchronize_cbs(dev); +#endif dev->config->reset(dev); } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ef30465dd39c..96731cdef422 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1708,7 +1708,11 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; +#else + vq->broken = false; +#endif vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); vq->event_triggered = false; vq->num_added = 0; @@ -2154,9 +2158,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } if (unlikely(vq->broken)) { +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION dev_warn_once(&vq->vq.vdev->dev, "virtio vring IRQ raised before DRIVER_OK"); return IRQ_NONE; +#else + return IRQ_HANDLED; +#endif } /* Just a hint for performance: so it's ok that this can be racy! */ @@ -2199,7 +2207,11 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; +#else + vq->broken = false; +#endif vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 49c7c32815f1..b47c2e7ed0ee 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -257,6 +257,7 @@ void virtio_device_ready(struct virtio_device *dev) WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION /* * The virtio_synchronize_cbs() makes sure vring_interrupt() * will see the driver specific setup if it sees vq->broken @@ -264,6 +265,7 @@ void virtio_device_ready(struct virtio_device *dev) */ virtio_synchronize_cbs(dev); __virtio_unbreak_device(dev); +#endif /* * The transport should ensure the visibility of vq->broken * before setting DRIVER_OK. See the comments for the transport -- cgit v1.2.3 From ebdec859faa8cfbfef9f6c1f83d79dd6c8f4ab8c Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 23 Jun 2022 17:18:58 +0000 Subject: KVM: x86/svm: add __GFP_ACCOUNT to __sev_dbg_{en,de}crypt_user() Adding the accounting flag when allocating pages within the SEV function, since these memory pages should belong to individual VM. No functional change intended. Signed-off-by: Mingwei Zhang Message-Id: <20220623171858.2083637-1-mizhang@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 51fd985cf21d..e2c7fada0b3d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, /* If source buffer is not aligned then use an intermediate buffer */ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { - src_tpage = alloc_page(GFP_KERNEL); + src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!src_tpage) return -ENOMEM; @@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; - dst_tpage = alloc_page(GFP_KERNEL); + dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!dst_tpage) { ret = -ENOMEM; goto e_free; -- cgit v1.2.3 From 6defa24d3b12bbd418bc8526dea1cbc605265c06 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 23 Jun 2022 10:34:06 -0700 Subject: KVM: SEV: Init target VMCBs in sev_migrate_from The target VMCBs during an intra-host migration need to correctly setup for running SEV and SEV-ES guests. Add sev_init_vmcb() function and make sev_es_init_vmcb() static. sev_init_vmcb() uses the now private function to init SEV-ES guests VMCBs when needed. Fixes: 0b020f5af092 ("KVM: SEV: Add support for SEV-ES intra host migration") Fixes: b56639318bb2 ("KVM: SEV: Add support for SEV intra host migration") Signed-off-by: Peter Gonda Cc: Marc Orr Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Tom Lendacky Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <20220623173406.744645-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 68 +++++++++++++++++++++++++++++++++----------------- arch/x86/kvm/svm/svm.c | 11 ++------ arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e2c7fada0b3d..0c240ed04f96 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1665,19 +1665,24 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; + struct kvm_vcpu *dst_vcpu, *src_vcpu; + struct vcpu_svm *dst_svm, *src_svm; struct kvm_sev_info *mirror; + unsigned long i; dst->active = true; dst->asid = src->asid; dst->handle = src->handle; dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; + dst->es_active = src->es_active; src->asid = 0; src->active = false; src->handle = 0; src->pages_locked = 0; src->enc_context_owner = NULL; + src->es_active = false; list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); @@ -1704,26 +1709,21 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) list_del(&src->mirror_entry); list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); } -} -static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) -{ - unsigned long i; - struct kvm_vcpu *dst_vcpu, *src_vcpu; - struct vcpu_svm *dst_svm, *src_svm; + kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { + dst_svm = to_svm(dst_vcpu); - if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) - return -EINVAL; + sev_init_vmcb(dst_svm); - kvm_for_each_vcpu(i, src_vcpu, src) { - if (!src_vcpu->arch.guest_state_protected) - return -EINVAL; - } + if (!dst->es_active) + continue; - kvm_for_each_vcpu(i, src_vcpu, src) { + /* + * Note, the source is not required to have the same number of + * vCPUs as the destination when migrating a vanilla SEV VM. + */ + src_vcpu = kvm_get_vcpu(dst_kvm, i); src_svm = to_svm(src_vcpu); - dst_vcpu = kvm_get_vcpu(dst, i); - dst_svm = to_svm(dst_vcpu); /* * Transfer VMSA and GHCB state to the destination. Nullify and @@ -1740,8 +1740,23 @@ static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; src_vcpu->arch.guest_state_protected = false; } - to_kvm_svm(src)->sev_info.es_active = false; - to_kvm_svm(dst)->sev_info.es_active = true; +} + +static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) +{ + struct kvm_vcpu *src_vcpu; + unsigned long i; + + if (!sev_es_guest(src)) + return 0; + + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) + return -EINVAL; + + kvm_for_each_vcpu(i, src_vcpu, src) { + if (!src_vcpu->arch.guest_state_protected) + return -EINVAL; + } return 0; } @@ -1789,11 +1804,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_dst_vcpu; - if (sev_es_guest(source_kvm)) { - ret = sev_es_migrate_from(kvm, source_kvm); - if (ret) - goto out_source_vcpu; - } + ret = sev_check_source_vcpus(kvm, source_kvm); + if (ret) + goto out_source_vcpu; sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); @@ -2914,7 +2927,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -void sev_es_init_vmcb(struct vcpu_svm *svm) +static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; @@ -2967,6 +2980,15 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) } } +void sev_init_vmcb(struct vcpu_svm *svm) +{ + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; + clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_init_vmcb(svm); +} + void sev_es_vcpu_reset(struct vcpu_svm *svm) { /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 87da90360bc7..44bbf25dfeb9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1212,15 +1212,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } - if (sev_guest(vcpu->kvm)) { - svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; - clr_exception_intercept(svm, UD_VECTOR); - - if (sev_es_guest(vcpu->kvm)) { - /* Perform SEV-ES specific VMCB updates */ - sev_es_init_vmcb(svm); - } - } + if (sev_guest(vcpu->kvm)) + sev_init_vmcb(svm); svm_hv_init_vmcb(vmcb); init_vmcb_after_set_cpuid(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1bddd336a27e..9223ac100ef5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -649,10 +649,10 @@ void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); +void sev_init_vmcb(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); -- cgit v1.2.3 From 87d044096ea62f1f230e8c4679ee8abf03266f64 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 13 Jun 2022 14:16:27 -0500 Subject: crypto: ccp - Fix device IRQ counting by using platform_irq_count() The ccp driver loops through the platform device resources array to get the IRQ count for the device. With commit a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core"), the IRQ resources are no longer stored in the platform device resource array. As a result, the IRQ count is now always zero. This causes the driver to issue a second call to platform_get_irq(), which fails if the IRQ count is really 1, causing the loading of the driver to fail. Replace looping through the resources array to count the number of IRQs with a call to platform_irq_count(). Fixes: a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core") Signed-off-by: Tom Lendacky Reviewed-by: Rob Herring Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-platform.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 9dba52fbee99..7d79a8744f9a 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -85,17 +85,9 @@ static int sp_get_irqs(struct sp_device *sp) struct sp_platform *sp_platform = sp->dev_specific; struct device *dev = sp->dev; struct platform_device *pdev = to_platform_device(dev); - unsigned int i, count; int ret; - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *res = &pdev->resource[i]; - - if (resource_type(res) == IORESOURCE_IRQ) - count++; - } - - sp_platform->irq_count = count; + sp_platform->irq_count = platform_irq_count(pdev); ret = platform_get_irq(pdev, 0); if (ret < 0) { @@ -104,7 +96,7 @@ static int sp_get_irqs(struct sp_device *sp) } sp->psp_irq = ret; - if (count == 1) { + if (sp_platform->irq_count == 1) { sp->ccp_irq = ret; } else { ret = platform_get_irq(pdev, 1); -- cgit v1.2.3 From 90bc2af24638659da56397ff835f3c95a948f991 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Jun 2022 10:46:31 -0400 Subject: USB: gadget: Fix double-free bug in raw_gadget driver Re-reading a recently merged fix to the raw_gadget driver showed that it inadvertently introduced a double-free bug in a failure pathway. If raw_ioctl_init() encounters an error after the driver ID number has been allocated, it deallocates the ID number before returning. But when dev_free() runs later on, it will then try to deallocate the ID number a second time. Closely related to this issue is another error in the recent fix: The ID number is stored in the raw_dev structure before the code checks to see whether the structure has already been initialized, in which case the new ID number would overwrite the earlier value. The solution to both bugs is to keep the new ID number in a local variable, and store it in the raw_dev structure only after the check for prior initialization. No errors can occur after that point, so the double-free will never happen. Fixes: f2d8c2606825 ("usb: gadget: Fix non-unique driver names in raw-gadget driver") CC: Andrey Konovalov CC: Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YrMrRw5AyIZghN0v@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 5c8481cef35f..2acece16b890 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -430,6 +430,7 @@ out_put: static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) { int ret = 0; + int driver_id_number; struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; @@ -452,10 +453,9 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } - ret = ida_alloc(&driver_id_numbers, GFP_KERNEL); - if (ret < 0) - return ret; - dev->driver_id_number = ret; + driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (driver_id_number < 0) + return driver_id_number; driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); if (!driver_driver_name) { @@ -463,7 +463,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) goto out_free_driver_id_number; } snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, - DRIVER_NAME ".%d", dev->driver_id_number); + DRIVER_NAME ".%d", driver_id_number); udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_driver_name) { @@ -507,6 +507,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; + dev->driver_id_number = driver_id_number; dev->state = STATE_DEV_INITIALIZED; spin_unlock_irqrestore(&dev->lock, flags); @@ -521,7 +522,7 @@ out_free_udc_driver_name: out_free_driver_driver_name: kfree(driver_driver_name); out_free_driver_id_number: - ida_free(&driver_id_numbers, dev->driver_id_number); + ida_free(&driver_id_numbers, driver_id_number); return ret; } -- cgit v1.2.3 From b24346a240b36cfc4df194d145463874985aa29b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 23 Jun 2022 11:02:42 +0800 Subject: usb: chipidea: udc: check request status before setting device address The complete() function may be called even though request is not completed. In this case, it's necessary to check request status so as not to set device address wrongly. Fixes: 10775eb17bee ("usb: chipidea: udc: update gadget states according to ch9") cc: Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20220623030242.41796-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index dc6c96e04bcf..3b8bf6daf7d0 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1048,6 +1048,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req) struct ci_hdrc *ci = req->context; unsigned long flags; + if (req->status < 0) + return; + if (ci->setaddr) { hw_usb_set_address(ci, ci->address); ci->setaddr = false; -- cgit v1.2.3 From c242507c1b895646b4a25060df13b6214805759f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 24 Jun 2022 14:51:39 +0200 Subject: MAINTAINERS: Add new IOMMU development mailing list The IOMMU mailing list will move from lists.linux-foundation.org to lists.linux.dev. The hard switch of the archive will happen on July 5th, but add the new list now already so that people start using the list when sending patches. After July 5th the old list will disappear. Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20220624125139.412-1-joro@8bytes.org Signed-off-by: Joerg Roedel --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..36d1bc999815 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -427,6 +427,7 @@ ACPI VIOT DRIVER M: Jean-Philippe Brucker L: linux-acpi@vger.kernel.org L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/acpi/viot.c F: include/linux/acpi_viot.h @@ -960,6 +961,7 @@ AMD IOMMU (AMD-VI) M: Joerg Roedel R: Suravee Suthikulpanit L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/amd/ @@ -5962,6 +5964,7 @@ M: Christoph Hellwig M: Marek Szyprowski R: Robin Murphy L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -5974,6 +5977,7 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Xiang Chen L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ @@ -7558,6 +7562,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained F: drivers/iommu/exynos-iommu.c @@ -9977,6 +9982,7 @@ INTEL IOMMU (VT-d) M: David Woodhouse M: Lu Baolu L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ @@ -10356,6 +10362,7 @@ IOMMU DRIVERS M: Joerg Roedel M: Will Deacon L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ @@ -12504,6 +12511,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* @@ -16545,6 +16553,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -19170,6 +19179,7 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -21844,6 +21854,7 @@ M: Juergen Gross M: Stefano Stabellini L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: iommu@lists.linux-foundation.org +L: iommu@lists.linux.dev S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* -- cgit v1.2.3 From 8da33fd11c05b7c64ef6456970f2fce61851806e Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 24 Jun 2022 09:43:59 +0000 Subject: cifs: avoid deadlocks while updating iface We use cifs_tcp_ses_lock to protect a lot of things. Not only does it protect the lists of connections, sessions, tree connects, open file lists, etc., we also use it to protect some fields in each of it's entries. In this case, cifs_mark_ses_for_reconnect takes the cifs_tcp_ses_lock to traverse the lists, and then calls cifs_update_iface. However, that can end up calling cifs_put_tcp_session, which picks up the same lock again. Avoid this by taking a ref for the session, drop the lock, and then call update iface. Also, in cifs_update_iface, avoid nested locking of iface_lock and chan_lock, as much as possible. When unavoidable, we need to pick iface_lock first. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 15 ++++++++++++--- fs/cifs/sess.c | 33 +++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e666d2643ede..8d56325915d0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -236,7 +236,7 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { struct TCP_Server_Info *pserver; - struct cifs_ses *ses; + struct cifs_ses *ses, *nses; struct cifs_tcon *tcon; /* @@ -250,10 +250,19 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { /* check if iface is still active */ - if (!cifs_chan_is_iface_active(ses, server)) + if (!cifs_chan_is_iface_active(ses, server)) { + /* + * HACK: drop the lock before calling + * cifs_chan_update_iface to avoid deadlock + */ + ses->ses_count++; + spin_unlock(&cifs_tcp_ses_lock); cifs_chan_update_iface(ses, server); + spin_lock(&cifs_tcp_ses_lock); + ses->ses_count--; + } spin_lock(&ses->chan_lock); if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7c26ee70c8b0..b85718f32b53 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -256,29 +256,34 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) /* * update the iface for the channel if necessary. - * will return 0 when iface is updated. 1 otherwise + * will return 0 when iface is updated, 1 if removed, 2 otherwise * Must be called with chan_lock held. */ int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) { - unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + unsigned int chan_index; struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; int rc = 0; - /* primary channel. This can never go away */ - if (!chan_index) + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (!chan_index) { + spin_unlock(&ses->chan_lock); return 0; + } if (ses->chans[chan_index].iface) { old_iface = ses->chans[chan_index].iface; - if (old_iface->is_active) + if (old_iface->is_active) { + spin_unlock(&ses->chan_lock); return 1; + } } + spin_unlock(&ses->chan_lock); spin_lock(&ses->iface_lock); - /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { if (!iface->is_active || @@ -295,8 +300,6 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } - ses->chans[chan_index].iface = iface; - /* now drop the ref to the current iface */ if (old_iface && iface) { kref_put(&old_iface->refcount, release_iface); @@ -311,14 +314,20 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); } - spin_unlock(&ses->iface_lock); + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + ses->chans[chan_index].iface = iface; + /* No iface is found. if secondary chan, drop connection */ - if (!iface && CIFS_SERVER_IS_CHAN(server)) { - cifs_put_tcp_session(server, false); + if (!iface && CIFS_SERVER_IS_CHAN(server)) ses->chans[chan_index].server = NULL; - } + + spin_unlock(&ses->chan_lock); + + if (!iface && CIFS_SERVER_IS_CHAN(server)) + cifs_put_tcp_session(server, false); return rc; } -- cgit v1.2.3 From 17b1362d49191625440ca2c195959ce0b37ec296 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Fri, 7 Jan 2022 09:51:54 -0500 Subject: MAINTAINERS: Update email address Update my email address in the MAINTAINERS file as the current one will stop functioning in a while. Signed-off-by: Thara Gopinath Reviewed-by: Bjorn Andersson Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 192ba251e987..ffa52d4bdfc5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16489,7 +16489,7 @@ F: Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml F: drivers/cpufreq/qcom-cpufreq-nvmem.c QUALCOMM CRYPTO DRIVERS -M: Thara Gopinath +M: Thara Gopinath L: linux-crypto@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@ -16599,7 +16599,7 @@ F: include/linux/if_rmnet.h QUALCOMM TSENS THERMAL DRIVER M: Amit Kucheria -M: Thara Gopinath +M: Thara Gopinath L: linux-pm@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained -- cgit v1.2.3 From 1ba904b6b16e08de5aed7c1349838d9cd0d178c5 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 11:58:41 +0400 Subject: ARM: cns3xxx: Fix refcount leak in cns3xxx_init of_find_compatible_node() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Fixes: 415f59142d9d ("ARM: cns3xxx: initial DT support") Signed-off-by: Miaoqian Lin Acked-by: Krzysztof Halasa Signed-off-by: Arnd Bergmann --- arch/arm/mach-cns3xxx/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e4f4b20b83a2..3fc4ec830e3a 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -372,6 +372,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -385,6 +386,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; -- cgit v1.2.3 From 2c629dd2d14fd7f64a553f809eda6d0b3a4f615a Mon Sep 17 00:00:00 2001 From: Liang He Date: Thu, 16 Jun 2022 17:30:27 +0800 Subject: arm: mach-spear: Add missing of_node_put() in time.c In spear_setup_of_timer(), of_find_matching_node() will return a node pointer with refcount incrementd. We should use of_node_put() in each fail path or when it is not used anymore. Signed-off-by: Liang He Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20220616093027.3984903-1-windhl@126.com' Signed-off-by: Arnd Bergmann --- arch/arm/mach-spear/time.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c index d1fdb6066f7b..c7c17c0f936c 100644 --- a/arch/arm/mach-spear/time.c +++ b/arch/arm/mach-spear/time.c @@ -218,13 +218,13 @@ void __init spear_setup_of_timer(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { pr_err("%s: No irq passed for timer via DT\n", __func__); - return; + goto err_put_np; } gpt_base = of_iomap(np, 0); if (!gpt_base) { pr_err("%s: of iomap failed\n", __func__); - return; + goto err_put_np; } gpt_clk = clk_get_sys("gpt0", NULL); @@ -239,6 +239,8 @@ void __init spear_setup_of_timer(void) goto err_prepare_enable_clk; } + of_node_put(np); + spear_clockevent_init(irq); spear_clocksource_init(); @@ -248,4 +250,6 @@ err_prepare_enable_clk: clk_put(gpt_clk); err_iomap: iounmap(gpt_base); +err_put_np: + of_node_put(np); } -- cgit v1.2.3 From 7f058112873e86ca760f2d2b0e1ccc2ab111f418 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Fri, 24 Jun 2022 16:35:11 +0930 Subject: ARM: dts: aspeed: nuvia: rename vendor nuvia to qcom Nuvia has been acquired by Qualcomm and the vendor name 'nuvia' will not be used anymore so rename aspeed-bmc-nuvia-dc-scm.dts to aspeed-bmc-qcom-dc-scm-v1.dts and change 'nuvia' to 'qcom' as its vendor name in the file. Fixes: 7b46aa7c008d ("ARM: dts: aspeed: Add Nuvia DC-SCM BMC") Signed-off-by: Jae Hyun Yoo Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20220523175640.60155-1-quic_jaehyoo@quicinc.com Link: https://lore.kernel.org/r/20220624070511.4070659-1-joel@jms.id.au' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/Makefile | 2 +- arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts | 190 ------------------------ arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts | 190 ++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 191 deletions(-) delete mode 100644 arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts create mode 100644 arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 184899808ee7..5112f493f494 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1586,7 +1586,6 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-lenovo-hr630.dtb \ aspeed-bmc-lenovo-hr855xg2.dtb \ aspeed-bmc-microsoft-olympus.dtb \ - aspeed-bmc-nuvia-dc-scm.dtb \ aspeed-bmc-opp-lanyang.dtb \ aspeed-bmc-opp-mihawk.dtb \ aspeed-bmc-opp-mowgli.dtb \ @@ -1599,6 +1598,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-opp-witherspoon.dtb \ aspeed-bmc-opp-zaius.dtb \ aspeed-bmc-portwell-neptune.dtb \ + aspeed-bmc-qcom-dc-scm-v1.dtb \ aspeed-bmc-quanta-q71l.dtb \ aspeed-bmc-quanta-s6q.dtb \ aspeed-bmc-supermicro-x11spi.dtb \ diff --git a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts b/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts deleted file mode 100644 index f4a97cfb0f23..000000000000 --- a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -// Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. - -/dts-v1/; - -#include "aspeed-g6.dtsi" - -/ { - model = "Nuvia DC-SCM BMC"; - compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600"; - - aliases { - serial4 = &uart5; - }; - - chosen { - stdout-path = &uart5; - bootargs = "console=ttyS4,115200n8"; - }; - - memory@80000000 { - device_type = "memory"; - reg = <0x80000000 0x40000000>; - }; -}; - -&mdio3 { - status = "okay"; - - ethphy3: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - -&mac2 { - status = "okay"; - - /* Bootloader sets up the MAC to insert delay */ - phy-mode = "rgmii"; - phy-handle = <ðphy3>; - - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_rgmii3_default>; -}; - -&mac3 { - status = "okay"; - - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_rmii4_default>; - - use-ncsi; -}; - -&rtc { - status = "okay"; -}; - -&fmc { - status = "okay"; - - flash@0 { - status = "okay"; - m25p,fast-read; - label = "bmc"; - spi-max-frequency = <133000000>; -#include "openbmc-flash-layout-64.dtsi" - }; - - flash@1 { - status = "okay"; - m25p,fast-read; - label = "alt-bmc"; - spi-max-frequency = <133000000>; -#include "openbmc-flash-layout-64-alt.dtsi" - }; -}; - -&spi1 { - status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi1_default>; - - flash@0 { - status = "okay"; - m25p,fast-read; - label = "bios"; - spi-max-frequency = <133000000>; - }; -}; - -&gpio0 { - gpio-line-names = - /*A0-A7*/ "","","","","","","","", - /*B0-B7*/ "BMC_FLASH_MUX_SEL","","","","","","","", - /*C0-C7*/ "","","","","","","","", - /*D0-D7*/ "","","","","","","","", - /*E0-E7*/ "","","","","","","","", - /*F0-F7*/ "","","","","","","","", - /*G0-G7*/ "","","","","","","","", - /*H0-H7*/ "","","","","","","","", - /*I0-I7*/ "","","","","","","","", - /*J0-J7*/ "","","","","","","","", - /*K0-K7*/ "","","","","","","","", - /*L0-L7*/ "","","","","","","","", - /*M0-M7*/ "","","","","","","","", - /*N0-N7*/ "BMC_FWSPI_RST_N","","GPIO_1_BMC_3V3","","","","","", - /*O0-O7*/ "JTAG_MUX_A","JTAG_MUX_B","","","","","","", - /*P0-P7*/ "","","","","","","","", - /*Q0-Q7*/ "","","","","","","","", - /*R0-R7*/ "","","","","","","","", - /*S0-S7*/ "","","","","","","","", - /*T0-T7*/ "","","","","","","","", - /*U0-U7*/ "","","","","","","","", - /*V0-V7*/ "","","","SCMFPGA_SPARE_GPIO1_3V3", - "SCMFPGA_SPARE_GPIO2_3V3","SCMFPGA_SPARE_GPIO3_3V3", - "SCMFPGA_SPARE_GPIO4_3V3","SCMFPGA_SPARE_GPIO5_3V3", - /*W0-W7*/ "","","","","","","","", - /*X0-X7*/ "","","","","","","","", - /*Y0-Y7*/ "","","","","","","","", - /*Z0-Z7*/ "","","","","","","","", - /*AA0-AA7*/ "","","","","","","","", - /*AB0-AB7*/ "","","","","","","","", - /*AC0-AC7*/ "","","","","","","",""; -}; - -&gpio1 { - gpio-line-names = - /*A0-A7*/ "GPI_1_BMC_1V8","","","","","", - "SCMFPGA_SPARE_GPIO1_1V8","SCMFPGA_SPARE_GPIO2_1V8", - /*B0-B7*/ "SCMFPGA_SPARE_GPIO3_1V8","SCMFPGA_SPARE_GPIO4_1V8", - "SCMFPGA_SPARE_GPIO5_1V8","","","","","", - /*C0-C7*/ "","","","","","","","", - /*D0-D7*/ "","BMC_SPI1_RST_N","BIOS_FLASH_MUX_SEL","", - "","TPM2_PIRQ_N","TPM2_RST_N","", - /*E0-E7*/ "","","","","","","",""; -}; - -&i2c2 { - status = "okay"; -}; - -&i2c4 { - status = "okay"; -}; - -&i2c5 { - status = "okay"; -}; - -&i2c6 { - status = "okay"; -}; - -&i2c7 { - status = "okay"; -}; - -&i2c8 { - status = "okay"; -}; - -&i2c9 { - status = "okay"; -}; - -&i2c10 { - status = "okay"; -}; - -&i2c12 { - status = "okay"; -}; - -&i2c13 { - status = "okay"; -}; - -&i2c14 { - status = "okay"; -}; - -&i2c15 { - status = "okay"; -}; - -&vhub { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts new file mode 100644 index 000000000000..259ef3f54c5c --- /dev/null +++ b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + +/dts-v1/; + +#include "aspeed-g6.dtsi" + +/ { + model = "Qualcomm DC-SCM V1 BMC"; + compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600"; + + aliases { + serial4 = &uart5; + }; + + chosen { + stdout-path = &uart5; + bootargs = "console=ttyS4,115200n8"; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x40000000>; + }; +}; + +&mdio3 { + status = "okay"; + + ethphy3: ethernet-phy@1 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <1>; + }; +}; + +&mac2 { + status = "okay"; + + /* Bootloader sets up the MAC to insert delay */ + phy-mode = "rgmii"; + phy-handle = <ðphy3>; + + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_rgmii3_default>; +}; + +&mac3 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_rmii4_default>; + + use-ncsi; +}; + +&rtc { + status = "okay"; +}; + +&fmc { + status = "okay"; + + flash@0 { + status = "okay"; + m25p,fast-read; + label = "bmc"; + spi-max-frequency = <133000000>; +#include "openbmc-flash-layout-64.dtsi" + }; + + flash@1 { + status = "okay"; + m25p,fast-read; + label = "alt-bmc"; + spi-max-frequency = <133000000>; +#include "openbmc-flash-layout-64-alt.dtsi" + }; +}; + +&spi1 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_spi1_default>; + + flash@0 { + status = "okay"; + m25p,fast-read; + label = "bios"; + spi-max-frequency = <133000000>; + }; +}; + +&gpio0 { + gpio-line-names = + /*A0-A7*/ "","","","","","","","", + /*B0-B7*/ "BMC_FLASH_MUX_SEL","","","","","","","", + /*C0-C7*/ "","","","","","","","", + /*D0-D7*/ "","","","","","","","", + /*E0-E7*/ "","","","","","","","", + /*F0-F7*/ "","","","","","","","", + /*G0-G7*/ "","","","","","","","", + /*H0-H7*/ "","","","","","","","", + /*I0-I7*/ "","","","","","","","", + /*J0-J7*/ "","","","","","","","", + /*K0-K7*/ "","","","","","","","", + /*L0-L7*/ "","","","","","","","", + /*M0-M7*/ "","","","","","","","", + /*N0-N7*/ "BMC_FWSPI_RST_N","","GPIO_1_BMC_3V3","","","","","", + /*O0-O7*/ "JTAG_MUX_A","JTAG_MUX_B","","","","","","", + /*P0-P7*/ "","","","","","","","", + /*Q0-Q7*/ "","","","","","","","", + /*R0-R7*/ "","","","","","","","", + /*S0-S7*/ "","","","","","","","", + /*T0-T7*/ "","","","","","","","", + /*U0-U7*/ "","","","","","","","", + /*V0-V7*/ "","","","SCMFPGA_SPARE_GPIO1_3V3", + "SCMFPGA_SPARE_GPIO2_3V3","SCMFPGA_SPARE_GPIO3_3V3", + "SCMFPGA_SPARE_GPIO4_3V3","SCMFPGA_SPARE_GPIO5_3V3", + /*W0-W7*/ "","","","","","","","", + /*X0-X7*/ "","","","","","","","", + /*Y0-Y7*/ "","","","","","","","", + /*Z0-Z7*/ "","","","","","","","", + /*AA0-AA7*/ "","","","","","","","", + /*AB0-AB7*/ "","","","","","","","", + /*AC0-AC7*/ "","","","","","","",""; +}; + +&gpio1 { + gpio-line-names = + /*A0-A7*/ "GPI_1_BMC_1V8","","","","","", + "SCMFPGA_SPARE_GPIO1_1V8","SCMFPGA_SPARE_GPIO2_1V8", + /*B0-B7*/ "SCMFPGA_SPARE_GPIO3_1V8","SCMFPGA_SPARE_GPIO4_1V8", + "SCMFPGA_SPARE_GPIO5_1V8","","","","","", + /*C0-C7*/ "","","","","","","","", + /*D0-D7*/ "","BMC_SPI1_RST_N","BIOS_FLASH_MUX_SEL","", + "","TPM2_PIRQ_N","TPM2_RST_N","", + /*E0-E7*/ "","","","","","","",""; +}; + +&i2c2 { + status = "okay"; +}; + +&i2c4 { + status = "okay"; +}; + +&i2c5 { + status = "okay"; +}; + +&i2c6 { + status = "okay"; +}; + +&i2c7 { + status = "okay"; +}; + +&i2c8 { + status = "okay"; +}; + +&i2c9 { + status = "okay"; +}; + +&i2c10 { + status = "okay"; +}; + +&i2c12 { + status = "okay"; +}; + +&i2c13 { + status = "okay"; +}; + +&i2c14 { + status = "okay"; +}; + +&i2c15 { + status = "okay"; +}; + +&vhub { + status = "okay"; +}; -- cgit v1.2.3 From 935336c191040809bf5739654ea337c13fe8f9af Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 23 Jun 2022 11:12:31 +0200 Subject: selftests/bpf: Test sockmap update when socket has ULP Cover the scenario when we cannot insert a socket into the sockmap, because it has it is using ULP. Failed insert should not have any effect on the ULP state. This is a regression test. Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220623091231.417138-1-jakub@cloudflare.com Signed-off-by: Jakub Kicinski --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 84 +++++++++++++++++++--- 1 file changed, 75 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index af293ea1542c..e172d89e92e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -4,6 +4,7 @@ * Tests for sockmap/sockhash holding kTLS sockets. */ +#include #include "test_progs.h" #define MAX_TEST_NAME 80 @@ -92,9 +93,78 @@ close_srv: close(srv); } +static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + struct sockaddr_in6 *v6; + struct sockaddr_in *v4; + int err, s, zero = 0; + + switch (family) { + case AF_INET: + v4 = (struct sockaddr_in *)&addr; + v4->sin_family = AF_INET; + break; + case AF_INET6: + v6 = (struct sockaddr_in6 *)&addr; + v6->sin6_family = AF_INET6; + break; + default: + PRINT_FAIL("unsupported socket family %d", family); + return; + } + + s = socket(family, SOCK_STREAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + return; + + err = bind(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + goto close; + + err = getsockname(s, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + goto close; + + err = connect(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "connect")) + goto close; + + /* save sk->sk_prot and set it to tls_prots */ + err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto close; + + /* sockmap update should not affect saved sk_prot */ + err = bpf_map_update_elem(map, &zero, &s, BPF_ANY); + if (!ASSERT_ERR(err, "sockmap update elem")) + goto close; + + /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */ + err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + ASSERT_OK(err, "setsockopt(TCP_NODELAY)"); + +close: + close(s); +} + +static const char *fmt_test_name(const char *subtest_name, int family, + enum bpf_map_type map_type) +{ + const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"; + const char *family_str = AF_INET ? "IPv4" : "IPv6"; + static char test_name[MAX_TEST_NAME]; + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls %s %s %s", + subtest_name, family_str, map_type_str); + + return test_name; +} + static void run_tests(int family, enum bpf_map_type map_type) { - char test_name[MAX_TEST_NAME]; int map; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); @@ -103,14 +173,10 @@ static void run_tests(int family, enum bpf_map_type map_type) return; } - snprintf(test_name, MAX_TEST_NAME, - "sockmap_ktls disconnect_after_delete %s %s", - family == AF_INET ? "IPv4" : "IPv6", - map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); - if (!test__start_subtest(test_name)) - return; - - test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) + test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) + test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } -- cgit v1.2.3 From af3a6d1018f02c6dc8388f1f3785a559c7ab5961 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Jun 2022 15:01:43 -0300 Subject: cifs: update cifs_ses::ip_addr after failover cifs_ses::ip_addr wasn't being updated in cifs_session_setup() when reconnecting SMB sessions thus returning wrong value in /proc/fs/cifs/DebugData. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@kernel.org Signed-off-by: Steve French --- fs/cifs/connect.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d56325915d0..fa29c9aae24b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4025,10 +4025,16 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, struct nls_table *nls_info) { int rc = -ENOSYS; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; bool is_binding = false; - spin_lock(&cifs_tcp_ses_lock); + if (server->dstaddr.ss_family == AF_INET6) + scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr); + else + scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr); + if (ses->ses_status != SES_GOOD && ses->ses_status != SES_NEW && ses->ses_status != SES_NEED_RECON) { -- cgit v1.2.3 From 2990f223ffa7bb25422956b9f79f9176a5b38346 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 21 Jun 2022 09:25:44 +0400 Subject: RDMA/cm: Fix memory leak in ib_cm_insert_listen cm_alloc_id_priv() allocates resource for the cm_id_priv. When cm_init_listen() fails it doesn't free it, leading to memory leak. Add the missing error unwind. Fixes: 98f67156a80f ("RDMA/cm: Simplify establishing a listen cm_id") Link: https://lore.kernel.org/r/20220621052546.4821-1-linmq006@gmail.com Signed-off-by: Miaoqian Lin Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1c107d6d03b9..b985e0d9bc05 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1252,8 +1252,10 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, return ERR_CAST(cm_id_priv); err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); -- cgit v1.2.3 From 6f0012e35160cd08a53e46e3b3bbf724b92dfe68 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 05:04:36 +0000 Subject: tcp: add a missing nf_reset_ct() in 3WHS handling When the third packet of 3WHS connection establishment contains payload, it is added into socket receive queue without the XFRM check and the drop of connection tracking context. This means that if the data is left unread in the socket receive queue, conntrack module can not be unloaded. As most applications usually reads the incoming data immediately after accept(), bug has been hiding for quite a long time. Commit 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists") exposed this bug because even if the application reads this data, the skb with nfct state could stay in a per-cpu cache for an arbitrary time, if said cpu no longer process RX softirqs. Many thanks to Ilya Maximets for reporting this issue, and for testing various patches: https://lore.kernel.org/netdev/20220619003919.394622-1-i.maximets@ovn.org/ Note that I also added a missing xfrm4_policy_check() call, although this is probably not a big issue, as the SYN packet should have been dropped earlier. Fixes: b59c270104f0 ("[NETFILTER]: Keep conntrack reference until IPsec policy checks are done") Reported-by: Ilya Maximets Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Pablo Neira Ayuso Cc: Steffen Klassert Tested-by: Ilya Maximets Reviewed-by: Ilya Maximets Link: https://lore.kernel.org/r/20220623050436.1290307-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe8f23b95d32..da5a3c44c4fb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1964,7 +1964,10 @@ process: struct sock *nsk; sk = req->rsk_listener; - drop_reason = tcp_inbound_md5_hash(sk, skb, + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + drop_reason = SKB_DROP_REASON_XFRM_POLICY; + else + drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, &iph->daddr, AF_INET, dif, sdif); if (unlikely(drop_reason)) { @@ -2016,6 +2019,7 @@ process: } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); -- cgit v1.2.3 From 3b9bc84d311104906d2b4995a9a02d7b7ddab2db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:20:39 -0700 Subject: net: tun: unlink NAPI from device on destruction Syzbot found a race between tun file and device destruction. NAPIs live in struct tun_file which can get destroyed before the netdev so we have to del them explicitly. The current code is missing deleting the NAPI if the queue was detached first. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Reported-by: syzbot+b75c138e9286ac742647@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220623042039.2274708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 87a635aac008..7fd0288c3789 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -727,6 +727,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); -- cgit v1.2.3 From c96614eeab663646f57f67aa591e015abd8bd0ba Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:44 +0200 Subject: net: dp83822: disable false carrier interrupt When unplugging an Ethernet cable, false carrier events were produced by the PHY at a very high rate. Once the false carrier counter full, an interrupt was triggered every few clock cycles until the cable was replugged. This resulted in approximately 10k/s interrupts. Since the false carrier counter (FCSCR) is never used, we can safely disable this interrupt. In addition to improving performance, this also solved MDIO read timeouts I was randomly encountering with an i.MX8 fec MAC because of the interrupt flood. The interrupt count and MDIO timeout fix were tested on a v5.4.110 kernel. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index e6ad3a494d32..95ef507053a6 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -230,7 +230,6 @@ static int dp83822_config_intr(struct phy_device *phydev) return misr_status; misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_FALSE_CARRIER_HF_INT_EN | DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); -- cgit v1.2.3 From 0e597e2affb90d6ea48df6890d882924acf71e19 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Thu, 23 Jun 2022 15:46:45 +0200 Subject: net: dp83822: disable rx error interrupt Some RX errors, notably when disconnecting the cable, increase the RCSR register. Once half full (0x7fff), an interrupt flood is generated. I measured ~3k/s interrupts even after the RX errors transfer was stopped. Since we don't read and clear the RCSR register, we should disable this interrupt. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 95ef507053a6..8549e0e356c9 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -229,8 +229,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; - misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_LINK_STAT_INT_EN | + misr_status |= (DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); -- cgit v1.2.3 From 3b89b511ea0c705cc418440e2abf9d692a556d84 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 16:32:32 +0300 Subject: net: fix IFF_TX_SKB_NO_LINEAR definition The "1<<31" shift has a sign extension bug so IFF_TX_SKB_NO_LINEAR is 0xffffffff80000000 instead of 0x0000000080000000. Fixes: c2ff53d8049f ("net: Add priv_flags for allow tx skb without linear") Signed-off-by: Dan Carpenter Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/YrRrcGttfEVnf85Q@kili Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..2563d30736e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1671,7 +1671,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, - IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; -- cgit v1.2.3 From 501dcbe495c0484a8f7954f1b24d2002dc6cb2d2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Jun 2022 21:56:16 +0800 Subject: LoongArch: Fix the !THP build Fix the !THP build by making pmd_pfn() available in all configurations. Because pmd_pfn() is used in mm/page_vma_mapped.c whether or not THP is configured. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 5dc84d8f18d6..d9e86cfa53e2 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -426,6 +426,11 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ @@ -497,11 +502,6 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; -} - static inline struct page *pmd_page(pmd_t pmd) { if (pmd_trans_huge(pmd)) -- cgit v1.2.3 From 92264f2dae7324f3189d22c0a0f0cb4e5d30d617 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jun 2022 16:55:41 +0800 Subject: LoongArch: Fix the _stext symbol address _stext means the start of .text section (see __is_kernel_text()), but we put its definition in .ref.text by mistake. Fix it by defining it in the vmlinux.lds.S. Signed-off-by: Huacai Chen --- arch/loongarch/kernel/head.S | 2 -- arch/loongarch/kernel/vmlinux.lds.S | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e596dfcd924b..d01e62dd414f 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -14,8 +14,6 @@ __REF -SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) - SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 78311a6101a3..69c76f26c1c5 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -37,6 +37,7 @@ SECTIONS HEAD_TEXT_SECTION . = ALIGN(PECOFF_SEGMENT_ALIGN); + _stext = .; .text : { TEXT_TEXT SCHED_TEXT -- cgit v1.2.3 From bab1c299f3945ffe7934c05f3c50377ca4b291b4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 23 Jun 2022 09:55:42 +0800 Subject: LoongArch: Fix sleeping in atomic context in setup_tlb_handler() Since setup_tlb_handler() is executed in atomic context, we should use GFP_ATOMIC instead of GFP_KERNEL to alloc pages. Otherwise we will get a "sleeping in atomic context" error: [ 0.013118] BUG: sleeping function called from invalid context at mm/page_alloc.c:5158 [ 0.013126] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 0.013131] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19-rc3+ #1008 1a223086d14d07967cc427f15d52139422271360 [ 0.013136] Hardware name: Loongson Loongson-3A5000-7A1000-1w-V0.1-CRB/Loongson-LS3A5000-7A1000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V2.0.04082-beta7 04/27 [ 0.013140] Stack : 90000000015fc990 9000000100493c18 9000000000df3370 9000000100490000 [ 0.013151] 9000000100493b50 0000000000000000 9000000100493b58 9000000001417ef0 [ 0.013160] 900000000199e54e 0000000000000040 9000000100493c18 90000000015f7a98 [ 0.013168] ffffffffffffffff 6de72f8b42179d1e 9000000100403b80 90000000015f7890 [ 0.013176] 0000000000000001 00000000fffff175 9000000000eb9860 9000000001530b4b [ 0.013184] 9000000000e99e60 0000000000000013 0000000006ecc000 0000000000000001 [ 0.013193] 90000000015f7a98 9000000001417ef0 0000000000000004 0000000000000000 [ 0.013201] 0000000000000cc0 0000000000000000 0000000000000001 90000000015fc990 [ 0.013209] 9000000000217e74 9000000001603b6b 9000000000208640 0000000000000000 [ 0.013217] 00000000000000b0 0000000000000004 0000000000000000 0000000000070000 [ 0.013225] ... [ 0.013229] Call Trace: [ 0.013230] [<9000000000208640>] show_stack+0x4c/0x14c [ 0.013240] [<9000000000df3370>] dump_stack_lvl+0x70/0xac [ 0.013246] [<9000000000270c8c>] ___might_sleep+0x104/0x124 [ 0.013253] [<9000000000477e84>] __alloc_pages+0x240/0x464 [ 0.013260] [<9000000000214214>] setup_tlb_handler+0x104/0x1e8 [ 0.013265] [<9000000000214324>] tlb_init+0x2c/0x3c [ 0.013270] [<9000000000208b74>] per_cpu_trap_init+0xec/0x108 [ 0.013275] [<9000000000202850>] cpu_probe+0x400/0x8a4 [ 0.013279] [<900000000020d160>] start_secondary+0x5c/0x3d4 Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index e272f8ac57d1..6d050973241c 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -281,7 +281,7 @@ void setup_tlb_handler(int cpu) if (pcpu_handlers[cpu]) return; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); + page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz)); if (!page) return; -- cgit v1.2.3 From 26808cebf14cdf1d835ae256188ece116d2ab377 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 23 Jun 2022 16:11:54 +0800 Subject: LoongArch: Fix EENTRY/MERRENTRY setting in setup_tlb_handler() setup_tlb_handler() is expected to set per-cpu exception handlers, but it only set the TLBRENTRY successfully because of copy & paste errors, so fix it. Reviewed-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 6d050973241c..9818ce11546b 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -286,10 +286,11 @@ void setup_tlb_handler(int cpu) return; addr = page_address(page); - pcpu_handlers[cpu] = virt_to_phys(addr); + pcpu_handlers[cpu] = (unsigned long)addr; memcpy((void *)addr, (void *)eentry, vec_sz); local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); - csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); } #endif -- cgit v1.2.3 From ad82eef3cebf8cd4f67e20b902e6d02e679e2ef1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 18 Jun 2022 12:50:31 +0800 Subject: LoongArch: Fix wrong fpu version According to the configuration information accessible by the CPUCFG instruction in LoongArch Reference Manual [1], FP_ver is stored in bit [5: 3] of CPUCFG2, the current code to get fpu version is wrong, use CPUCFG2_FPVERS to fix it. [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 6c87ea36b257..529ab8f44ec6 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -263,7 +263,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); - c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3; c->fpu_csr0 = FPU_CSR_RN; c->fpu_mask = FPU_CSR_RSVD; -- cgit v1.2.3 From ea18d434781105ce61ff3ef7f74c9e51812f0580 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 18 Jun 2022 16:39:11 +0800 Subject: LoongArch: Make compute_return_era() return void compute_return_era() always returns 0, make it return void, and then no need to check its return value for its callers. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/branch.h | 3 +-- arch/loongarch/kernel/traps.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/branch.h b/arch/loongarch/include/asm/branch.h index 3f33c89f35b4..9a133e4c068e 100644 --- a/arch/loongarch/include/asm/branch.h +++ b/arch/loongarch/include/asm/branch.h @@ -12,10 +12,9 @@ static inline unsigned long exception_era(struct pt_regs *regs) return regs->csr_era; } -static inline int compute_return_era(struct pt_regs *regs) +static inline void compute_return_era(struct pt_regs *regs) { regs->csr_era += 4; - return 0; } #endif /* _ASM_BRANCH_H */ diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e4060f84a221..1bf58c65e2bf 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -475,8 +475,7 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs) die_if_kernel("Reserved instruction in kernel code", regs); - if (unlikely(compute_return_era(regs) < 0)) - goto out; + compute_return_era(regs); if (unlikely(get_user(opcode, era) < 0)) { status = SIGSEGV; -- cgit v1.2.3 From 53632ba87d9f302a8d97a11ec2f4f4eec7bb75ea Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 24 Jun 2022 04:11:47 +0900 Subject: kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS (2nd attempt) If CONFIG_TRIM_UNUSED_KSYMS is enabled and the kernel is built from a pristine state, the vmlinux is linked twice. Commit 3fdc7d3fe4c0 ("kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS") explains why this happens, but it did not fix the issue at all. Now I realized I had applied a wrong patch. In v1 patch [1], the autoksyms_recursive target correctly recurses to "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive". In v2 patch [2], I accidentally dropped the diff line, and it recurses to "$(MAKE) -f $(srctree)/Makefile vmlinux". Restore the code I intended in v1. [1]: https://lore.kernel.org/linux-kbuild/1521045861-22418-8-git-send-email-yamada.masahiro@socionext.com/ [2]: https://lore.kernel.org/linux-kbuild/1521166725-24157-8-git-send-email-yamada.masahiro@socionext.com/ Fixes: 3fdc7d3fe4c0 ("kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS") Signed-off-by: Masahiro Yamada Tested-by: Sami Tolvanen Reviewed-by: Nick Desaulniers --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 513c1fbf7888..a3ff166c5035 100644 --- a/Makefile +++ b/Makefile @@ -1141,7 +1141,7 @@ KBUILD_MODULES := 1 autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) -- cgit v1.2.3 From ff139766764675b9df12bcbc8928a02149b7ba95 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 16 Jun 2022 19:57:59 +0000 Subject: kbuild: Ignore __this_module in gen_autoksyms.sh Module object files can contain an undefined reference to __this_module, which isn't resolved until we link the final .ko. The kernel doesn't export this symbol, so ignore it in gen_autoksyms.sh. Signed-off-by: Sami Tolvanen Tested-by: Steve Muckle Reviewed-by: Nick Desaulniers Tested-by: Ramji Jiyani --- scripts/gen_autoksyms.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh index faacf7062122..653fadbad302 100755 --- a/scripts/gen_autoksyms.sh +++ b/scripts/gen_autoksyms.sh @@ -56,4 +56,7 @@ EOT # point addresses. sed -e 's/^\.//' | sort -u | +# Ignore __this_module. It's not an exported symbol, and will be resolved +# when the final .ko's are linked. +grep -v '^__this_module$' | sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$output_file" -- cgit v1.2.3 From d16c5c7c925658ea94689dfe4469441f7fd59f00 Mon Sep 17 00:00:00 2001 From: Jiang Jian Date: Tue, 21 Jun 2022 14:38:23 +0800 Subject: parisc: align '*' in comment in math-emu code Signed-off-by: Jiang Jian Reviewed-by: Kees Cook Signed-off-by: Helge Deller --- arch/parisc/math-emu/decode_exc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c index 494ca41df05d..d41ddb3430b5 100644 --- a/arch/parisc/math-emu/decode_exc.c +++ b/arch/parisc/math-emu/decode_exc.c @@ -102,7 +102,7 @@ decode_fpu(unsigned int Fpu_register[], unsigned int trap_counts[]) * that happen. Want to keep this overhead low, but still provide * some information to the customer. All exits from this routine * need to restore Fpu_register[0] - */ + */ bflags=(Fpu_register[0] & 0xf8000000); Fpu_register[0] &= 0x07ffffff; -- cgit v1.2.3 From 067baa9a37b32b95fdeabccde4b0cb6a2cf95f96 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 25 Jun 2022 13:01:08 +0200 Subject: ksmbd: use vfs_llseek instead of dereferencing NULL By not checking whether llseek is NULL, this might jump to NULL. Also, it doesn't check FMODE_LSEEK. Fix this by using vfs_llseek(), which always does the right thing. Fixes: f44158485826 ("cifsd: add file operations") Cc: stable@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: Ronnie Sahlberg Cc: Hyunchul Lee Cc: Sergey Senozhatsky Reviewed-by: Namjae Jeon Acked-by: Al Viro Signed-off-by: Jason A. Donenfeld Signed-off-by: Steve French --- fs/ksmbd/vfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index f194bf764f9f..5d185564aef6 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1048,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, *out_count = 0; end = start + length; while (start < end && *out_count < in_count) { - extent_start = f->f_op->llseek(f, start, SEEK_DATA); + extent_start = vfs_llseek(f, start, SEEK_DATA); if (extent_start < 0) { if (extent_start != -ENXIO) ret = (int)extent_start; @@ -1058,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, if (extent_start >= end) break; - extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); + extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); if (extent_end < 0) { if (extent_end != -ENXIO) ret = (int)extent_end; -- cgit v1.2.3 From e9ed22e6e5010997a2f922eef61ca797d0a2a246 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 18 Jun 2022 15:14:34 +0000 Subject: parisc: Fix flush_anon_page on PA8800/PA8900 Anonymous pages are allocated with the shared mappings colouring, SHM_COLOUR. Since the alias boundary on machines with PA8800 and PA8900 processors is unknown, flush_user_cache_page() might not flush all mappings of a shared anonymous page. Flushing the whole data cache flushes all mappings. This won't fix all coherency issues with shared mappings but it seems to work well in practice. I haven't seen any random memory faults in almost a month on a rp3440 running as a debian buildd machine. There is a small preformance hit. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.18+ --- arch/parisc/kernel/cache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c8a11fcecf4c..a9bc578e4c52 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -722,7 +722,10 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon return; if (parisc_requires_coherency()) { - flush_user_cache_page(vma, vmaddr); + if (vma->vm_flags & VM_SHARED) + flush_data_cache(); + else + flush_user_cache_page(vma, vmaddr); return; } -- cgit v1.2.3 From 0a1355db36718178becd2bfe728a023933d73123 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 26 Jun 2022 11:50:43 +0200 Subject: parisc: Enable ARCH_HAS_STRICT_MODULE_RWX Fix a boot crash on a c8000 machine as reported by Dave. Basically it changes patch_map() to return an alias mapping to the to-be-patched code in order to prevent writing to write-protected memory. Signed-off-by: Helge Deller Suggested-by: John David Anglin Cc: stable@vger.kernel.org # v5.2+ Link: https://lore.kernel.org/all/e8ec39e8-25f8-e6b4-b7ed-4cb23efc756e@bell.net/ --- arch/parisc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5f2448dc5a2b..fa400055b2d5 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,6 +10,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN -- cgit v1.2.3 From 342cb0d80613719c5e5ac30619e54b9a3fd83625 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 Jun 2022 13:39:04 +0300 Subject: perf inject: Fix missing free in copy_kcore_dir() Free string allocated by asprintf(). Fixes: d8fc08550929bb84 ("perf inject: Keep a copy of kcore_dir") Signed-off-by: Adrian Hunter Cc: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220620103904.7960-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a75bf11585b5..063f74f5b8db 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -891,7 +891,9 @@ static int copy_kcore_dir(struct perf_inject *inject) if (ret < 0) return ret; pr_debug("%s\n", cmd); - return system(cmd); + ret = system(cmd); + free(cmd); + return ret; } static int output_fd(struct perf_inject *inject) -- cgit v1.2.3 From 0fdd435cb4f873b5602913db4f2ba497a5443daf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Nov 2021 11:08:31 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick up the changes in: ecf8eca51f33dbfd ("drm/i915/xehp: Add compute engine ABI") 991b4de3275728fd ("drm/i915/uapi: Add kerneldoc for engine class enum") c94fde8f516610b0 ("drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES") 1c671ad753dbbf5f ("drm/i915/doc: Link query items to their uapi structs") a2e5402691e23269 ("drm/i915/doc: Convert perf UAPI comments to kerneldoc") 462ac1cdf4d7acf1 ("drm/i915/doc: Convert drm_i915_query_topology_info comment to kerneldoc") 034d47b25b2ce627 ("drm/i915/uapi: Document DRM_I915_QUERY_HWCONFIG_BLOB") 78e1fb3112c0ac44 ("drm/i915/uapi: Add query for hwconfig blob") That don't add any new ioctl, so no changes in tooling. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: John Harrison Cc: Matt Atwood Cc: Matt Roper Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Link: http://lore.kernel.org/lkml/YrDi4ALYjv9Mdocq@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 353 +++++++++++++++++++++++++++++--------- 1 file changed, 272 insertions(+), 81 deletions(-) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 05c3642aaece..a2def7b27009 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -154,25 +154,77 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; -/* +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * * Different engines serve different roles, and there may be more than one - * engine serving each role. enum drm_i915_gem_engine_class provides a - * classification of the role of the engine, which may be used when requesting - * operations to be performed on a certain subset of engines, or for providing - * information about that group. + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. */ enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - /* should be kept compact */ + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + + /* Values in this enum should be kept compact. */ + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ I915_ENGINE_CLASS_INVALID = -1 }; -/* +/** + * struct i915_engine_class_instance - Engine class/instance identifier + * * There may be more than one engine fulfilling any role within the system. * Each engine of a class is given a unique instance number and therefore * any engine can be specified by its class:instance tuplet. APIs that allow @@ -180,10 +232,21 @@ enum drm_i915_gem_engine_class { * for this identification. */ struct i915_engine_class_instance { - __u16 engine_class; /* see enum drm_i915_gem_engine_class */ - __u16 engine_instance; + /** + * @engine_class: + * + * Engine class from enum drm_i915_gem_engine_class + */ + __u16 engine_class; #define I915_ENGINE_CLASS_INVALID_NONE -1 #define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 + + /** + * @engine_instance: + * + * Engine instance. + */ + __u16 engine_instance; }; /** @@ -2657,24 +2720,65 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; -/* +/** + * struct drm_i915_perf_oa_config + * * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { - /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + /** + * @uuid: + * + * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" + */ char uuid[36]; + /** + * @n_mux_regs: + * + * Number of mux regs in &mux_regs_ptr. + */ __u32 n_mux_regs; + + /** + * @n_boolean_regs: + * + * Number of boolean regs in &boolean_regs_ptr. + */ __u32 n_boolean_regs; + + /** + * @n_flex_regs: + * + * Number of flex regs in &flex_regs_ptr. + */ __u32 n_flex_regs; - /* - * These fields are pointers to tuples of u32 values (register address, - * value). For example the expected length of the buffer pointed by - * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + /** + * @mux_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_mux_regs). */ __u64 mux_regs_ptr; + + /** + * @boolean_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_boolean_regs). + */ __u64 boolean_regs_ptr; + + /** + * @flex_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_flex_regs). + */ __u64 flex_regs_ptr; }; @@ -2685,12 +2789,24 @@ struct drm_i915_perf_oa_config { * @data_ptr is also depends on the specific @query_id. */ struct drm_i915_query_item { - /** @query_id: The id for this query */ + /** + * @query_id: + * + * The id for this query. Currently accepted query IDs are: + * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) + * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config) + * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) + * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) + * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + */ __u64 query_id; -#define DRM_I915_QUERY_TOPOLOGY_INFO 1 -#define DRM_I915_QUERY_ENGINE_INFO 2 -#define DRM_I915_QUERY_PERF_CONFIG 3 -#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_BLOB 5 +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 /* Must be kept compact -- no holes and well documented */ /** @@ -2706,14 +2822,17 @@ struct drm_i915_query_item { /** * @flags: * - * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. * - * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the + * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the * following: * - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_LIST + * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * + * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain + * a struct i915_engine_class_instance that references a render engine. */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 @@ -2771,66 +2890,112 @@ struct drm_i915_query { __u64 items_ptr; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : - * - * data: contains the 3 pieces of information : - * - * - the slice mask with one bit per slice telling whether a slice is - * available. The availability of slice X can be queried with the following - * formula : - * - * (data[X / 8] >> (X % 8)) & 1 - * - * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. Gen12 has dual-subslices, which are - * similar to two gen11 subslices. For gen12, this array represents dual- - * subslices. The availability of subslice Y in slice X can be queried - * with the following formula : - * - * (data[subslice_offset + - * X * subslice_stride + - * Y / 8] >> (Y % 8)) & 1 - * - * - the EU mask for each subslice in each slice with one bit per EU telling - * whether an EU is available. The availability of EU Z in subslice Y in - * slice X can be queried with the following formula : +/** + * struct drm_i915_query_topology_info * - * (data[eu_offset + - * (X * max_subslices + Y) * eu_stride + - * Z / 8] >> (Z % 8)) & 1 + * Describes slice/subslice/EU information queried by + * %DRM_I915_QUERY_TOPOLOGY_INFO */ struct drm_i915_query_topology_info { - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u16 flags; + /** + * @max_slices: + * + * The number of bits used to express the slice mask. + */ __u16 max_slices; + + /** + * @max_subslices: + * + * The number of bits used to express the subslice mask. + */ __u16 max_subslices; + + /** + * @max_eus_per_subslice: + * + * The number of bits in the EU mask that correspond to a single + * subslice's EUs. + */ __u16 max_eus_per_subslice; - /* + /** + * @subslice_offset: + * * Offset in data[] at which the subslice masks are stored. */ __u16 subslice_offset; - /* + /** + * @subslice_stride: + * * Stride at which each of the subslice masks for each slice are * stored. */ __u16 subslice_stride; - /* + /** + * @eu_offset: + * * Offset in data[] at which the EU masks are stored. */ __u16 eu_offset; - /* + /** + * @eu_stride: + * * Stride at which each of the EU masks for each subslice are stored. */ __u16 eu_stride; + /** + * @data: + * + * Contains 3 pieces of information : + * + * - The slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the + * following formula : + * + * .. code:: c + * + * (data[X / 8] >> (X % 8)) & 1 + * + * Starting with Xe_HP platforms, Intel hardware no longer has + * traditional slices so i915 will always report a single slice + * (hardcoded slicemask = 0x1) which contains all of the platform's + * subslices. I.e., the mask here does not reflect any of the newer + * hardware concepts such as "gslices" or "cslices" since userspace + * is capable of inferring those from the subslice mask. + * + * - The subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. Starting with Gen12 we use the + * term "subslice" to refer to what the hardware documentation + * describes as a "dual-subslices." The availability of subslice Y + * in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1 + * + * - The EU mask for each subslice in each slice, with one bit per EU + * telling whether an EU is available. The availability of EU Z in + * subslice Y in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8 + * ] >> (Z % 8)) & 1 + */ __u8 data[]; }; @@ -2951,52 +3116,68 @@ struct drm_i915_query_engine_info { struct drm_i915_engine_info engines[]; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. +/** + * struct drm_i915_query_perf_config + * + * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and + * %DRM_I915_QUERY_GEOMETRY_SUBSLICES. */ struct drm_i915_query_perf_config { union { - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets - * this fields to the number of configurations available. + /** + * @n_configs: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to + * the number of configurations available. */ __u64 n_configs; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @config: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. */ __u64 config; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @uuid: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. * * String formatted like "%08x-%04x-%04x-%04x-%012x" */ char uuid[36]; }; - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will - * write an array of __u64 of configuration identifiers. + /** + * @data: * - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will - * write a struct drm_i915_perf_oa_config. If the following fields of - * drm_i915_perf_oa_config are set not set to 0, i915 will write into - * the associated pointers the values of submitted when the + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST, + * i915 will write an array of __u64 of configuration identifiers. + * + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA, + * i915 will write a struct drm_i915_perf_oa_config. If the following + * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will + * write into the associated pointers the values of submitted when the * configuration was created : * - * - n_mux_regs - * - n_boolean_regs - * - n_flex_regs + * - &drm_i915_perf_oa_config.n_mux_regs + * - &drm_i915_perf_oa_config.n_boolean_regs + * - &drm_i915_perf_oa_config.n_flex_regs */ __u8 data[]; }; @@ -3134,6 +3315,16 @@ struct drm_i915_query_memory_regions { struct drm_i915_memory_region_info regions[]; }; +/** + * DOC: GuC HWCONFIG blob uAPI + * + * The GuC produces a blob with information about the current device. + * i915 reads this blob from GuC and makes it available via this uAPI. + * + * The format and meaning of the blob content are documented in the + * Programmer's Reference Manual. + */ + /** * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added * extension support using struct i915_user_extension. -- cgit v1.2.3 From 4b3f7644ae84bcf785cc88d35327227cb2fb6b82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:39:15 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes from: d6d0c7f681fda1d0 ("x86/cpufeatures: Add PerfMonV2 feature bit") 296d5a17e793956f ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts") f30903394eb62316 ("x86/cpufeatures: Add virtual TSC_AUX feature bit") 8ad7e8f696951f19 ("x86/fpu/xsave: Support XSAVEC in the kernel") 59bd54a84d15e933 ("x86/tdx: Detect running as a TDX guest in early boot") a77d41ac3a0f41c8 ("x86/cpufeatures: Add AMD Fam19h Branch Sampling feature") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Peter Zijlstra Cc: Sandipan Das Cc: Babu Moger Cc: Paolo Bonzini Cc: Thomas Gleixner Cc: Dave Hansen Cc: Kuppuswamy Sathyanarayanan Cc: Stephane Eranian Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/YrDkgmwhLv+nKeOo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 7 +++++-- tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e17de69faa54..03acc823838a 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -/* FREE! ( 7*32+10) */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -/* FREE! ( 7*32+20) */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -238,6 +238,7 @@ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ #define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -315,6 +316,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -405,6 +407,7 @@ #define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ /* diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 1ae0fab7d902..36369e76cc63 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) #endif +#ifdef CONFIG_INTEL_TDX_GUEST +# define DISABLE_TDX_GUEST 0 +#else +# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -73,7 +79,7 @@ #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) -#define DISABLED_MASK8 0 +#define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 -- cgit v1.2.3 From ab66fdace8581ef3b4e7cf5381a168ed4058d779 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Jun 2022 15:51:44 +0300 Subject: perf build-id: Fix caching files with a wrong build ID Build ID events associate a file name with a build ID. However, when using perf inject, there is no guarantee that the file on the current machine at the current time has that build ID. Fix by comparing the build IDs and skip adding to the cache if they are different. Example: $ echo "int main() {return 0;}" > prog.c $ gcc -o prog prog.c $ perf record --buildid-all ./prog [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data ] $ file-buildid() { file $1 | awk -F= '{print $2}' | awk -F, '{print $1}' ; } $ file-buildid prog 444ad9be165d8058a48ce2ffb4e9f55854a3293e $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf 444ad9be165d8058a48ce2ffb4e9f55854a3293e $ echo "int main() {return 1;}" > prog.c $ gcc -o prog prog.c $ file-buildid prog 885524d5aaa24008a3e2b06caa3ea95d013c0fc5 Before: $ perf buildid-cache --purge $(pwd)/prog $ perf inject -i perf.data -o junk $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf 885524d5aaa24008a3e2b06caa3ea95d013c0fc5 $ After: $ perf buildid-cache --purge $(pwd)/prog $ perf inject -i perf.data -o junk $ file-buildid ~/.debug/$(pwd)/prog/444ad9be165d8058a48ce2ffb4e9f55854a3293e/elf $ Fixes: 454c407ec17a0c63 ("perf: add perf-inject builtin") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Zanussi Link: https://lore.kernel.org/r/20220621125144.5623-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 82f3d46bea70..328668f38c69 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -872,6 +872,30 @@ out_free: return err; } +static int filename__read_build_id_ns(const char *filename, + struct build_id *bid, + struct nsinfo *nsi) +{ + struct nscookie nsc; + int ret; + + nsinfo__mountns_enter(nsi, &nsc); + ret = filename__read_build_id(filename, bid); + nsinfo__mountns_exit(&nsc); + + return ret; +} + +static bool dso__build_id_mismatch(struct dso *dso, const char *name) +{ + struct build_id bid; + + if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0) + return false; + + return !dso__build_id_equal(dso, &bid); +} + static int dso__cache_build_id(struct dso *dso, struct machine *machine, void *priv __maybe_unused) { @@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, is_kallsyms = true; name = machine->mmap_name; } + + if (!is_kallsyms && dso__build_id_mismatch(dso, name)) + return 0; + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso); } -- cgit v1.2.3 From 3713e2494b6ab98f0476bb575b22323775f7d77a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Jun 2022 12:34:37 -0300 Subject: perf trace beauty: Fix generation of errno id->str table on ALT Linux For some reason using: cat < Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/arch_errno_names.sh | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index 2c5f72fa8108..37c53bac5f56 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -33,23 +33,13 @@ create_errno_lookup_func() local arch=$(arch_string "$1") local nr name - cat < Date: Tue, 21 Jun 2022 15:27:25 +0000 Subject: perf inject: Adjust output data offset for backward compatibility When 'perf inject' creates a new file, it reuses the data offset from the input file. If there has been a change on the size of the header, as happened in v5.12 -> v5.13, the new offsets will be wrong, resulting in a corrupted output file. This change adds the function perf_session__data_offset to compute the data offset based on the current header size, and uses that instead of the offset from the original input file. Signed-off-by: Raul Silvera Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Colin Ian King Cc: Dave Marchevsky Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220621152725.2668041-1-rsilvera@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 +- tools/perf/util/header.c | 14 ++++++++++++++ tools/perf/util/header.h | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 063f74f5b8db..54d4e508a092 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -918,7 +918,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.tracing_data = perf_event__repipe_tracing_data; } - output_data_offset = session->header.data_offset; + output_data_offset = perf_session__data_offset(session->evlist); if (inject->build_id_all) { inject->tool.mmap = perf_event__repipe_buildid_mmap; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 53332da100e8..6ad629db63b7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3686,6 +3686,20 @@ int perf_session__write_header(struct perf_session *session, return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); } +size_t perf_session__data_offset(const struct evlist *evlist) +{ + struct evsel *evsel; + size_t data_offset; + + data_offset = sizeof(struct perf_file_header); + evlist__for_each_entry(evlist, evsel) { + data_offset += evsel->core.ids * sizeof(u64); + } + data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr); + + return data_offset; +} + int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 08563c1f1bff..56916dabce7b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -136,6 +136,8 @@ int perf_session__inject_header(struct perf_session *session, int fd, struct feat_copier *fc); +size_t perf_session__data_offset(const struct evlist *evlist); + void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); -- cgit v1.2.3 From 448ce0e6ea93ae99e0b36055e5f5a3f723fe3665 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 22 Jun 2022 11:00:37 +0800 Subject: perf stat: Enable ignore_missing_thread perf already support ignore_missing_thread for -p, but not yet applied to `perf stat -p `. This patch enables ignore_missing_thread for `perf stat -p `. Committer notes: And here is a refresher about the 'ignore_missing_thread' knob, from a previous patch using it: ca8000684ec4e66f ("perf evsel: Enable ignore_missing_thread for pid option") --- While monitoring a multithread process with pid option, perf sometimes may return sys_perf_event_open failure with 3(No such process) if any of the process's threads die before we open the event. However, we want perf continue monitoring the remaining threads and do not exit with error. --- Signed-off-by: Gang Li Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220622030037.15005-1-ligang.bdlg@bytedance.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4ce87a8eb7d7..d2ecd4d29624 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2586,6 +2586,8 @@ int cmd_stat(int argc, const char **argv) if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack)) goto out; + /* Enable ignoring missing threads when -p option is defined. */ + evlist__first(evsel_list)->ignore_missing_thread = target.pid; status = 0; for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { if (stat_config.run_count != 1 && verbose > 0) -- cgit v1.2.3 From e2213a2dc63e1b2941728a9a938c2196548e980f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 09:12:55 -0300 Subject: tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: 84d7c8fd3aade2fe ("vhost-vdpa: introduce uAPI to set group ASID") 2d1fcb7758e49fd9 ("vhost-vdpa: uAPI to get virtqueue group id") a0c95f201170bd55 ("vhost-vdpa: introduce uAPI to get the number of address spaces") 3ace88bd37436abc ("vhost-vdpa: introduce uAPI to get the number of virtqueue groups") 175d493c3c3e09a3 ("vhost: move the backend feature bits to vhost_types.h") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h To pick up these changes and support them: $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2022-06-26 12:04:35.982003781 -0300 +++ after 2022-06-26 12:04:43.819972476 -0300 @@ -28,6 +28,7 @@ [0x74] = "VDPA_SET_CONFIG", [0x75] = "VDPA_SET_VRING_ENABLE", [0x77] = "VDPA_SET_CONFIG_CALL", + [0x7C] = "VDPA_SET_GROUP_ASID", }; static const char *vhost_virtio_ioctl_read_cmds[] = { [0x00] = "GET_FEATURES", @@ -39,5 +40,8 @@ [0x76] = "VDPA_GET_VRING_NUM", [0x78] = "VDPA_GET_IOVA_RANGE", [0x79] = "VDPA_GET_CONFIG_SIZE", + [0x7A] = "VDPA_GET_AS_NUM", + [0x7B] = "VDPA_GET_VRING_GROUP", [0x80] = "VDPA_GET_VQS_COUNT", + [0x81] = "VDPA_GET_GROUP_NUM", }; $ Cc: Adrian Hunter Cc: Gautam Dawar Cc: Jiri Olsa Cc: Michael S. Tsirkin Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Yrh3xMYbfeAD0MFL@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 5d99e7c242a2..cab645d4a645 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -89,11 +89,6 @@ /* Set or get vhost backend capability */ -/* Use message type V2 */ -#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 -/* IOTLB can accept batching hints */ -#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 - #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) @@ -150,11 +145,30 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) - /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) /* Get the count of all virtqueues */ #define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + +/* Get the number of address spaces. */ +#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +/* Get the group for a virtqueue: read index, write group in num, + * The virtqueue index is stored in the index field of + * vhost_vring_state. The group for this specific virtqueue is + * returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ + struct vhost_vring_state) +/* Set the ASID for a virtqueue group. The group index is stored in + * the index field of vhost_vring_state, the ASID associated with this + * group is stored at num field of vhost_vring_state. + */ +#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ + struct vhost_vring_state) + #endif -- cgit v1.2.3 From f8d866194082e703c86751cceb07f6243cde96d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 20:04:45 -0300 Subject: tools headers UAPI: Synch KVM's svm.h header with the kernel To pick up the changes from: d5af44dde5461d12 ("x86/sev: Provide support for SNP guest request NAEs") 0afb6b660a6b58cb ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs") dc3f3d2474b80eae ("x86/mm: Validate memory when changing the C-bit") cbd3d4f7c4e5a93e ("x86/sev: Check SEV-SNP features support") That gets these new SVM exit reasons: + { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ + { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ + { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ + { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ Addressing this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h This causes these changes: CC /tmp/build/perf-urgent/arch/x86/util/kvm-stat.o LD /tmp/build/perf-urgent/arch/x86/util/perf-in.o LD /tmp/build/perf-urgent/arch/x86/perf-in.o LD /tmp/build/perf-urgent/arch/perf-in.o LD /tmp/build/perf-urgent/perf-in.o LINK /tmp/build/perf-urgent/perf Cc: Adrian Hunter Cc: Borislav Petkov Cc: Brijesh Singh Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index efa969325ede..f69c168391aa 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -108,6 +108,14 @@ #define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_PSC 0x80000010 +#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011 +#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012 +#define SVM_VMGEXIT_AP_CREATION 0x80000013 +#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 +#define SVM_VMGEXIT_AP_CREATE 1 +#define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ @@ -218,6 +226,11 @@ { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ + { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ + { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ + { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ + { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } -- cgit v1.2.3 From 03c765b0e3b4cb5063276b086c76f7a612856a9a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Jun 2022 14:22:10 -0700 Subject: Linux 5.19-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3ff166c5035..8973b285ce6c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Superb Owl # *DOCUMENTATION* -- cgit v1.2.3 From d63eae6747eb8b3192e89712f6553c6aa162f872 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Tue, 14 Jun 2022 17:27:51 -0700 Subject: platform/x86: intel/pmc: Add Alder Lake N support to PMC core driver Add Alder Lake N (ADL-N) to the list of the platforms that Intel's PMC core driver supports. Alder Lake N reuses all the TigerLake PCH IPs. Cc: Srinivas Pandruvada Cc: Andy Shevchenko Cc: David E. Box Signed-off-by: Gayatri Kammela Reviewed-by: Rajneesh Bhardwaj Link: https://lore.kernel.org/r/20220615002751.3371730-1-gayatri.kammela@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 40183bda7894..a1fe1e0dcf4a 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1911,6 +1911,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &icl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map), {} -- cgit v1.2.3 From 79e90ca02d7d45898c52fd8523cd4a2a304e409c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 Jun 2022 11:11:07 +0300 Subject: platform/mellanox: nvsw-sn2201: fix error code in nvsw_sn2201_create_static_devices() This should return PTR_ERR() instead of IS_ERR(). Also "dev->client" has been set to NULL by this point so it returns 0/success so preserve the error code earlier. Fixes: 662f24826f95 ("platform/mellanox: Add support for new SN2201 system") Signed-off-by: Dan Carpenter Acked-by: Michael Shych Link: https://lore.kernel.org/r/YqmUGwmPK7cPolk/@kili Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/nvsw-sn2201.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index 2923daf63b75..7b9c107c17ce 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -890,6 +890,7 @@ nvsw_sn2201_create_static_devices(struct nvsw_sn2201 *nvsw_sn2201, int size) { struct mlxreg_hotplug_device *dev = devs; + int ret; int i; /* Create I2C static devices. */ @@ -901,6 +902,7 @@ nvsw_sn2201_create_static_devices(struct nvsw_sn2201 *nvsw_sn2201, dev->nr, dev->brdinfo->addr); dev->adapter = NULL; + ret = PTR_ERR(dev->client); goto fail_create_static_devices; } } @@ -914,7 +916,7 @@ fail_create_static_devices: dev->client = NULL; dev->adapter = NULL; } - return IS_ERR(dev->client); + return ret; } static void nvsw_sn2201_destroy_static_devices(struct nvsw_sn2201 *nvsw_sn2201, -- cgit v1.2.3 From d2f33f0c3ad7b0d5262d9b986f1353265fad7a08 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 21 Jun 2022 15:55:11 +0200 Subject: platform/x86: thinkpad_acpi: Fix a memory leak of EFCH MMIO resource Unlike release_mem_region(), a call to release_resource() does not free the resource, so it has to be freed explicitly to avoid a memory leak. Signed-off-by: Jean Delvare Fixes: 455cd867b85b ("platform/x86: thinkpad_acpi: Add a s2idle resume quirk for a number of laptops") Cc: Mario Limonciello Cc: Henrique de Moraes Holschuh Cc: Hans de Goede Cc: Mark Gross Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20220621155511.5b266395@endymion.delvare Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e6cb4a14cdd4..aa6ffeaa3932 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4529,6 +4529,7 @@ static void thinkpad_acpi_amd_s2idle_restore(void) iounmap(addr); cleanup_resource: release_resource(res); + kfree(res); } static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = { -- cgit v1.2.3 From 853a7614880231747040cada91d2b8d2e995c51a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Jun 2022 15:30:20 +0000 Subject: tunnels: do not assume mac header is set in skb_tunnel_check_pmtu() Recently added debug in commit f9aefd6b2aa3 ("net: warn if mac header was not set") caught a bug in skb_tunnel_check_pmtu(), as shown in this syzbot report [1]. In ndo_start_xmit() paths, there is really no need to use skb->mac_header, because skb->data is supposed to point at it. [1] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_mac_header_len include/linux/skbuff.h:2784 [inline] WARNING: CPU: 1 PID: 8604 at include/linux/skbuff.h:2784 skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Modules linked in: CPU: 1 PID: 8604 Comm: syz-executor.3 Not tainted 5.19.0-rc2-syzkaller-00443-g8720bd951b8e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_mac_header_len include/linux/skbuff.h:2784 [inline] RIP: 0010:skb_tunnel_check_pmtu+0x5de/0x2f90 net/ipv4/ip_tunnel_core.c:413 Code: 00 00 00 00 fc ff df 4c 89 fa 48 c1 ea 03 80 3c 02 00 0f 84 b9 fe ff ff 4c 89 ff e8 7c 0f d7 f9 e9 ac fe ff ff e8 c2 13 8a f9 <0f> 0b e9 28 fc ff ff e8 b6 13 8a f9 48 8b 54 24 70 48 b8 00 00 00 RSP: 0018:ffffc90002e4f520 EFLAGS: 00010212 RAX: 0000000000000324 RBX: ffff88804d5fd500 RCX: ffffc90005b52000 RDX: 0000000000040000 RSI: ffffffff87f05e3e RDI: 0000000000000003 RBP: ffffc90002e4f650 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: 000000000000ffff R13: 0000000000000000 R14: 000000000000ffcd R15: 000000000000001f FS: 00007f3babba9700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 0000000075319000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: geneve_xmit_skb drivers/net/geneve.c:927 [inline] geneve_xmit+0xcf8/0x35d0 drivers/net/geneve.c:1107 __netdev_start_xmit include/linux/netdevice.h:4805 [inline] netdev_start_xmit include/linux/netdevice.h:4819 [inline] __dev_direct_xmit+0x500/0x730 net/core/dev.c:4309 dev_direct_xmit include/linux/netdevice.h:3007 [inline] packet_direct_xmit+0x1b8/0x2c0 net/packet/af_packet.c:282 packet_snd net/packet/af_packet.c:3073 [inline] packet_sendmsg+0x21f4/0x55d0 net/packet/af_packet.c:3104 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2489 ___sys_sendmsg+0xf3/0x170 net/socket.c:2543 __sys_sendmsg net/socket.c:2572 [inline] __do_sys_sendmsg net/socket.c:2581 [inline] __se_sys_sendmsg net/socket.c:2579 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3baaa89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3babba9168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f3baab9bf60 RCX: 00007f3baaa89109 RDX: 0000000000000000 RSI: 0000000020000a00 RDI: 0000000000000003 RBP: 00007f3baaae305d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe74f2543f R14: 00007f3babba9300 R15: 0000000000022000 Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Stefano Brivio Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6b2dc7b2b612..cc1caab4a654 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || - (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); -- cgit v1.2.3 From cb8092d70a6f5f01ec1490fce4d35efed3ed996c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 24 Jun 2022 12:24:31 -0400 Subject: tipc: move bc link creation back to tipc_node_create Shuang Li reported a NULL pointer dereference crash: [] BUG: kernel NULL pointer dereference, address: 0000000000000068 [] RIP: 0010:tipc_link_is_up+0x5/0x10 [tipc] [] Call Trace: [] [] tipc_bcast_rcv+0xa2/0x190 [tipc] [] tipc_node_bc_rcv+0x8b/0x200 [tipc] [] tipc_rcv+0x3af/0x5b0 [tipc] [] tipc_udp_recv+0xc7/0x1e0 [tipc] It was caused by the 'l' passed into tipc_bcast_rcv() is NULL. When it creates a node in tipc_node_check_dest(), after inserting the new node into hashtable in tipc_node_create(), it creates the bc link. However, there is a gap between this insert and bc link creation, a bc packet may come in and get the node from the hashtable then try to dereference its bc link, which is NULL. This patch is to fix it by moving the bc link creation before inserting into the hashtable. Note that for a preliminary node becoming "real", the bc link creation should also be called before it's rehashed, as we don't create it for preliminary nodes. Fixes: 4cbf8ac2fe5a ("tipc: enable creating a "preliminary" node") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6ef95ce565bd..b48d97cbbe29 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; unsigned long intv; int bearer_id; int i; @@ -488,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); @@ -567,7 +577,16 @@ update: n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - n->bc_entry.link = NULL; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ @@ -1155,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l, *snd_l; + struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1175,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); - if (unlikely(!n->bc_entry.link)) { - snd_l = tipc_bc_sndlink(net); - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, peer_id, U16_MAX, - tipc_link_min_win(snd_l), - tipc_link_max_win(snd_l), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, snd_l, - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no mem\n"); - tipc_node_write_unlock_fast(n); - tipc_node_put(n); - return; - } - } le = &n->links[b->identity]; -- cgit v1.2.3 From 50c0ada627f56c92f5953a8bf9158b045ad026a1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 Jun 2022 15:29:49 +0800 Subject: virtio-net: fix race between ndo_open() and virtio_device_ready() We currently call virtio_device_ready() after netdev registration. Since ndo_open() can be called immediately after register_netdev, this means there exists a race between ndo_open() and virtio_device_ready(): the driver may start to use the device before DRIVER_OK which violates the spec. Fix this by switching to use register_netdevice() and protect the virtio_device_ready() with rtnl_lock() to make sure ndo_open() can only be called after virtio_device_ready(). Fixes: 4baf1e33d0842 ("virtio_net: enable VQs early") Signed-off-by: Jason Wang Message-Id: <20220617072949.30734-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index db05b5e930be..8a5810bcb839 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3655,14 +3655,20 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); - err = register_netdev(dev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + + err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); + rtnl_unlock(); goto free_failover; } virtio_device_ready(vdev); + rtnl_unlock(); + err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); -- cgit v1.2.3 From 11a37eb66812ce6a06b79223ad530eb0e1d7294d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Jun 2022 13:11:14 +0800 Subject: caif_virtio: fix race between virtio_device_ready() and ndo_open() We currently depend on probe() calling virtio_device_ready() - which happens after netdev registration. Since ndo_open() can be called immediately after register_netdev, this means there exists a race between ndo_open() and virtio_device_ready(): the driver may start to use the device (e.g. TX) before DRIVER_OK which violates the spec. Fix this by switching to use register_netdevice() and protect the virtio_device_ready() with rtnl_lock() to make sure ndo_open() can only be called after virtio_device_ready(). Fixes: 0d2e1a2926b18 ("caif_virtio: Introduce caif over virtio") Signed-off-by: Jason Wang Message-Id: <20220620051115.3142-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/caif/caif_virtio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 5458f57177a0..0b0f234b0b50 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -722,13 +722,21 @@ static int cfv_probe(struct virtio_device *vdev) /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + /* register Netdev */ - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { + rtnl_unlock(); dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); goto err; } + virtio_device_ready(vdev); + + rtnl_unlock(); + debugfs_init(cfv); return 0; -- cgit v1.2.3 From ed7ac37fde33ccd84e4bd2b9363c191f925364c7 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:20 +0200 Subject: virtio_mmio: Add missing PM calls to freeze/restore Most virtio drivers provide freeze/restore callbacks to finish up device usage before suspend and to reinitialize the virtio device after resume. However, these callbacks are currently only called when using virtio_pci. virtio_mmio does not have any PM ops defined. This causes problems for example after suspend to disk (hibernation), since the virtio devices might lose their state after the VMM is restarted. Calling virtio_device_freeze()/restore() ensures that the virtio devices are re-initialized correctly. Fix this by implementing the dev_pm_ops for virtio_mmio, similar to virtio_pci_common. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-2-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c9bec3813e94..980dffd69586 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -556,6 +557,25 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .synchronize_cbs = vm_synchronize_cbs, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -799,6 +819,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; -- cgit v1.2.3 From e0c2ce8217955537dd5434baeba061f209797119 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:21 +0200 Subject: virtio_mmio: Restore guest page size on resume Virtio devices might lose their state when the VMM is restarted after a suspend to disk (hibernation) cycle. This means that the guest page size register must be restored for the virtio_mmio legacy interface, since otherwise the virtio queues are not functional. This is particularly problematic for QEMU that currently still defaults to using the legacy interface for virtio_mmio. Write the guest page size register again in virtio_mmio_restore() to make legacy virtio_mmio devices work correctly after hibernation. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-3-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 980dffd69586..083ff1eb743d 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -569,6 +569,9 @@ static int virtio_mmio_restore(struct device *dev) { struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + return virtio_device_restore(&vm_dev->vdev); } -- cgit v1.2.3 From 037d4305569aacaa018a617771dccbb81cc60257 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 22 Jun 2022 17:14:07 +0200 Subject: vhost-vdpa: call vhost_vdpa_cleanup during the release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") we call vhost_vdpa_iotlb_free() during the release to clean all regions mapped in the iotlb. That commit removed vhost_vdpa_iotlb_free() and added vhost_vdpa_cleanup() to do some cleanup, including deleting all mappings, but we forgot to call it in vhost_vdpa_release(). This causes that if an application does not remove all mappings explicitly (or it crashes), the mappings remain in the iotlb and subsequent applications may fail if they map the same addresses. Calling vhost_vdpa_cleanup() also fixes a memory leak since we are not freeing `v->vdev.vqs` during the release from the same commit. Since vhost_vdpa_cleanup() calls vhost_dev_cleanup() we can remove its call from vhost_vdpa_release(). Fixes: 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") Cc: gautam.dawar@xilinx.com Signed-off-by: Stefano Garzarella Message-Id: <20220622151407.51232-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Eugenio Pérez Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 5ad2596c6e8a..23dcbfdfa13b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1209,7 +1209,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_dev_stop(&v->vdev); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); - vhost_dev_cleanup(&v->vdev); + vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); atomic_dec(&v->opened); -- cgit v1.2.3 From c7cc29aaebf9eaa543b4c70801e0ecef1101b3c8 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 22 Jun 2022 15:23:06 -0400 Subject: virtio_ring: make vring_create_virtqueue_split prettier Add some spaces to vring_alloc_queue(make it look prettier). Signed-off-by: Deming Wang Message-Id: <20220622192306.4371-1-wangdeming@inspur.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 96731cdef422..643ca779fcc6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -935,7 +935,7 @@ static struct virtqueue *vring_create_virtqueue_split( for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (queue) break; if (!may_reduce_num) -- cgit v1.2.3 From a27a1e35f5c87463ba7c12d5b7d7cbafbefc9213 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 23 Jun 2022 13:59:14 +0200 Subject: platform/x86: ideapad-laptop: Add allow_v4_dytc module parameter Add an allow_v4_dytc module parameter to allow users to easily test if DYTC version 4 platform-profiles work on their laptop. Fixes: 599482c58ebd ("platform/x86: ideapad-laptop: Add platform support for Ideapad 5 Pro 16ACH6-82L5") Link: https://bugzilla.kernel.org/show_bug.cgi?id=213297 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220623115914.103001-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 3ccb7b71dfb1..71f4b59eed4b 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -152,6 +152,10 @@ static bool no_bt_rfkill; module_param(no_bt_rfkill, bool, 0444); MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth."); +static bool allow_v4_dytc; +module_param(allow_v4_dytc, bool, 0444); +MODULE_PARM_DESC(allow_v4_dytc, "Enable DYTC version 4 platform-profile support."); + /* * ACPI Helpers */ @@ -901,13 +905,16 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF; - if (dytc_version < 5) { - if (dytc_version < 4 || !dmi_check_system(ideapad_dytc_v4_allow_table)) { - dev_info(&priv->platform_device->dev, - "DYTC_VERSION is less than 4 or is not allowed: %d\n", - dytc_version); - return -ENODEV; - } + if (dytc_version < 4) { + dev_info(&priv->platform_device->dev, "DYTC_VERSION < 4 is not supported\n"); + return -ENODEV; + } + + if (dytc_version < 5 && + !(allow_v4_dytc || dmi_check_system(ideapad_dytc_v4_allow_table))) { + dev_info(&priv->platform_device->dev, + "DYTC_VERSION 4 support may not work. Pass ideapad_laptop.allow_v4_dytc=Y on the kernel commandline to enable\n"); + return -ENODEV; } priv->dytc = kzalloc(sizeof(*priv->dytc), GFP_KERNEL); -- cgit v1.2.3 From 8853e8ce9b576e0a3aad8381e19a117964d445fa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 27 Jun 2022 15:08:50 +0200 Subject: platform/x86: ideapad-laptop: Add Ideapad 5 15ITL05 to ideapad_dytc_v4_allow_table[] The Ideapad 5 15ITL05 uses DYTC version 4 for platform-profile control. This has been tested successfully with the ideapad-laptop DYTC version 5 code; Add the Ideapad 5 15ITL05 to the ideapad_dytc_v4_allow_table[]. Fixes: 599482c58ebd ("platform/x86: ideapad-laptop: Add platform support for Ideapad 5 Pro 16ACH6-82L5") Link: https://bugzilla.kernel.org/show_bug.cgi?id=213297 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220627130850.313537-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 71f4b59eed4b..abd0c81d62c4 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -875,12 +875,18 @@ static void dytc_profile_refresh(struct ideapad_private *priv) static const struct dmi_system_id ideapad_dytc_v4_allow_table[] = { { /* Ideapad 5 Pro 16ACH6 */ - .ident = "LENOVO 82L5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82L5") } }, + { + /* Ideapad 5 15ITL05 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad 5 15ITL05") + } + }, {} }; -- cgit v1.2.3 From 05907f10e235680cc7fb196810e4ad3215d5e648 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jun 2022 14:01:41 +0200 Subject: netfilter: nft_dynset: restore set element counter when failing to update This patch fixes a race condition. nft_rhash_update() might fail for two reasons: - Element already exists in the hashtable. - Another packet won race to insert an entry in the hashtable. In both cases, new() has already bumped the counter via atomic_add_unless(), therefore, decrement the set element counter. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index df40314de21f..76de6c8d9865 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -152,6 +153,7 @@ out: err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } -- cgit v1.2.3 From e34b9ed96ce3b06c79bf884009b16961ca478f87 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 16:43:57 +0200 Subject: netfilter: nf_tables: avoid skb access on nf_stolen When verdict is NF_STOLEN, the skb might have been freed. When tracing is enabled, this can result in a use-after-free: 1. access to skb->nf_trace 2. access to skb->mark 3. computation of trace id 4. dump of packet payload To avoid 1, keep a cached copy of skb->nf_trace in the trace state struct. Refresh this copy whenever verdict is != STOLEN. Avoid 2 by skipping skb->mark access if verdict is STOLEN. 3 is avoided by precomputing the trace id. Only dump the packet when verdict is not "STOLEN". Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++------ net/netfilter/nf_tables_core.c | 24 ++++++++++++++++++--- net/netfilter/nf_tables_trace.c | 44 +++++++++++++++++++++------------------ 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7ad..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 53f40e473855..3ddce24ac76d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, enum nft_trace_types type) { - const struct nft_pktinfo *pkt = info->pkt; - - if (!info->trace || !pkt->skb->nf_trace) + if (!info->trace || !info->nf_trace) return; info->chain = chain; @@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + info->nf_trace = pkt->skb->nf_trace; info->rule = rule; __nft_trace_packet(info, chain, type); } } +static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + if (info->trace) + info->nf_trace = pkt->skb->nf_trace; + } +} + static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) { + const struct nft_pktinfo *pkt = info->pkt; enum nft_trace_types type; switch (regs->verdict.code) { @@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; + case NF_STOLEN: + type = NFT_TRACETYPE_RULE; + /* can't access skb->nf_trace; use copy */ + break; default: type = NFT_TRACETYPE_RULE; + info->nf_trace = pkt->skb->nf_trace; break; } @@ -254,6 +271,7 @@ next_rule: switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; + nft_trace_copy_nftrace(&info); continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423c2..1163ba9c1401 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,22 +25,6 @@ DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled); -static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) -{ - __be32 id; - - /* using skb address as ID results in a limited number of - * values (and quick reuse). - * - * So we attempt to use as many skb members that will not - * change while skb is with netfilter. - */ - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), - skb->skb_iif); - - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); -} - static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) @@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; + u32 mark = 0; u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) @@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure; - if (trace_fill_id(skb, pkt->skb)) + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) @@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info) case NFT_TRACETYPE_RULE: if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) goto nla_put_failure; + + /* pkt->skb undefined iff NF_STOLEN, disable dump */ + if (info->verdict->code == NF_STOLEN) + info->packet_dumped = true; + else + mark = pkt->skb->mark; + break; case NFT_TRACETYPE_POLICY: + mark = pkt->skb->mark; + if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; } - if (pkt->skb->mark && - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure; if (!info->packet_dumped) { @@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_chain *chain) { + static siphash_key_t trace_key __read_mostly; + struct sk_buff *skb = pkt->skb; + info->basechain = nft_base_chain(chain); info->trace = true; + info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; info->pkt = pkt; info->verdict = verdict; + + net_get_random_once(&trace_key, sizeof(trace_key)); + + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), + skb_get_hash(skb), + skb->skb_iif, + &trace_key); } -- cgit v1.2.3 From c2577862eeb0be94f151f2f1fff662b028061b00 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jun 2022 18:26:03 +0200 Subject: netfilter: br_netfilter: do not skip all hooks with 0 priority When br_netfilter module is loaded, skbs may be diverted to the ipv4/ipv6 hooks, just like as if we were routing. Unfortunately, bridge filter hooks with priority 0 may be skipped in this case. Example: 1. an nftables bridge ruleset is loaded, with a prerouting hook that has priority 0. 2. interface is added to the bridge. 3. no tcp packet is ever seen by the bridge prerouting hook. 4. flush the ruleset 5. load the bridge ruleset again. 6. tcp packets are processed as expected. After 1) the only registered hook is the bridge prerouting hook, but its not called yet because the bridge hasn't been brought up yet. After 2), hook order is: 0 br_nf_pre_routing // br_netfilter internal hook 0 chain bridge f prerouting // nftables bridge ruleset The packet is diverted to br_nf_pre_routing. If call-iptables is off, the nftables bridge ruleset is called as expected. But if its enabled, br_nf_hook_thresh() will skip it because it assumes that all 0-priority hooks had been called previously in bridge context. To avoid this, check for the br_nf_pre_routing hook itself, we need to resume directly after it, even if this hook has a priority of 0. Unfortunately, this still results in different packet flow. With this fix, the eval order after in 3) is: 1. br_nf_pre_routing 2. ip(6)tables (if enabled) 3. nftables bridge but after 5 its the much saner: 1. nftables bridge 2. br_nf_pre_routing 3. ip(6)tables (if enabled) Unfortunately I don't see a solution here: It would be possible to move br_nf_pre_routing to a higher priority so that it will be called later in the pipeline, but this also impacts ebtables evaluation order, and would still result in this very ordering problem for all nftables-bridge hooks with the same priority as the br_nf_pre_routing one. Searching back through the git history I don't think this has ever behaved in any other way, hence, no fixes-tag. Reported-by: Radim Hrazdil Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b04..ff4779036649 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); -- cgit v1.2.3 From 2390095113e98fc52fffe35c5206d30d9efe3f78 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Jun 2022 12:22:09 +0900 Subject: tick/nohz: unexport __init-annotated tick_nohz_full_setup() EXPORT_SYMBOL and __init is a bad combination because the .init.text section is freed up after the initialization. Hence, modules cannot use symbols annotated __init. The access to a freed symbol may end up with kernel panic. modpost used to detect it, but it had been broken for a decade. Commit 28438794aba4 ("modpost: fix section mismatch check for exported init/exit sections") fixed it so modpost started to warn it again, then this showed up: MODPOST vmlinux.symvers WARNING: modpost: vmlinux.o(___ksymtab_gpl+tick_nohz_full_setup+0x0): Section mismatch in reference from the variable __ksymtab_tick_nohz_full_setup to the function .init.text:tick_nohz_full_setup() The symbol tick_nohz_full_setup is exported and annotated __init Fix this by removing the __init annotation of tick_nohz_full_setup or drop the export. Drop the export because tick_nohz_full_setup() is only called from the built-in code in kernel/sched/isolation.c. Fixes: ae9e557b5be2 ("time: Export tick start/stop functions for rcutorture") Reported-by: Linus Torvalds Signed-off-by: Masahiro Yamada Tested-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- kernel/time/tick-sched.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 58a11f859ac7..30049580cd62 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -526,7 +526,6 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } -EXPORT_SYMBOL_GPL(tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { -- cgit v1.2.3 From a8fc8cb5692aebb9c6f7afd4265366d25dcd1d01 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Jun 2022 21:21:05 -0700 Subject: net: tun: stop NAPI when detaching queues While looking at a syzbot report I noticed the NAPI only gets disabled before it's deleted. I think that user can detach the queue before destroying the device and the NAPI will never be stopped. Fixes: 943170998b20 ("tun: enable NAPI for TUN/TAP driver") Acked-by: Petar Penkov Link: https://lore.kernel.org/r/20220623042105.2274812-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7fd0288c3789..e2eb35887394 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -273,6 +273,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -653,8 +659,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -808,6 +816,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); -- cgit v1.2.3 From 8ee9d82cd0a45e7d050ade598c9f33032a0f2891 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 26 Jun 2022 21:33:48 -0700 Subject: epic100: fix use after free on rmmod epic_close() calls epic_rx() and uses dma buffer, but in epic_remove_one() we already freed the dma buffer. To fix this issue, reorder function calls like in the .probe function. BUG: KASAN: use-after-free in epic_rx+0xa6/0x7e0 [epic100] Call Trace: epic_rx+0xa6/0x7e0 [epic100] epic_close+0xec/0x2f0 [epic100] unregister_netdev+0x18/0x20 epic_remove_one+0xaa/0xf0 [epic100] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yilun Wu Signed-off-by: Tong Zhang Reviewed-by: Francois Romieu Link: https://lore.kernel.org/r/20220627043351.25615-1-ztong0001@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/epic100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index a0654e88444c..0329caf63279 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1515,14 +1515,14 @@ static void epic_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct epic_private *ep = netdev_priv(dev); + unregister_netdev(dev); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); - unregister_netdev(dev); pci_iounmap(pdev, ep->ioaddr); - pci_release_regions(pdev); free_netdev(dev); + pci_release_regions(pdev); pci_disable_device(pdev); /* pci_power_off(pdev, -1); */ } -- cgit v1.2.3 From 76b39b94382f9e0a639e1c70c3253de248cc4c83 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 23 Jun 2022 11:07:41 -0300 Subject: net/sched: act_api: Notify user space if any actions were flushed before error If during an action flush operation one of the actions is still being referenced, the flush operation is aborted and the kernel returns to user space with an error. However, if the kernel was able to flush, for example, 3 actions and failed on the fourth, the kernel will not notify user space that it deleted 3 actions before failing. This patch fixes that behaviour by notifying user space of how many actions were deleted before flush failed and by setting extack with a message describing what happened. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da9733da9868..817065aa2833 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { -- cgit v1.2.3 From 88153e29c1e0f3ace8c831b06f6cea9503f16cec Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 23 Jun 2022 11:07:42 -0300 Subject: selftests: tc-testing: Add testcases to test new flush behaviour Add tdc test cases to verify new flush behaviour is correct, which do the following: - Try to flush only one action which is being referenced by a filter - Try to flush three actions where the last one (index 3) is being referenced by a filter Signed-off-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/actions/gact.json | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index b24494c6f546..c652e8c1157d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -609,5 +609,82 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "7f52", + "name": "Try to flush action which is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + "$TC actions add action pass index 1", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "1", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] + }, + { + "id": "ae1e", + "name": "Try to flush actions when last one is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + [ + "$TC actions add action pass index 1", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 2", + "$TC actions add action drop index 3", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] } ] -- cgit v1.2.3 From 4bbfed9112ca9da88ac83d5ffe62c988f7169e9f Mon Sep 17 00:00:00 2001 From: Shreenidhi Shedi Date: Sun, 26 Jun 2022 18:59:47 +0530 Subject: octeon_ep: use bitwise AND This should be bitwise operator not logical. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Shreenidhi Shedi Link: https://lore.kernel.org/r/20220626132947.3992423-1-sshedi@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index cc51149790ff..3d5d39a52fe6 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -52,7 +52,7 @@ #define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF) #define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) -#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF) +#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0xFF) /* SDP Function select */ #define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8 -- cgit v1.2.3 From 6b9f1d46fdada756294d2d5f04df613478386d34 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 26 Jun 2022 22:00:39 +0200 Subject: MAINTAINERS: nfc: drop Charles Gorand from NXP-NCI Mails to Charles get an auto reply, that he is no longer working at Eff'Innov technologies. Drop the entry and mark the driver as orphaned. Signed-off-by: Michael Walle Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220626200039.4062784-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6cc825857722..6a288d59ff79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14350,9 +14350,8 @@ F: Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml F: sound/soc/codecs/tfa989x.c NXP-NCI NFC DRIVER -R: Charles Gorand L: linux-nfc@lists.01.org (subscribers-only) -S: Supported +S: Orphan F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml F: drivers/nfc/nxp-nci -- cgit v1.2.3 From 805206e66fab4ba1e0ebd19402006d62cd1d4902 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:38 +0200 Subject: net: asix: fix "can't send until first packet is send" issue If cable is attached after probe sequence, the usbnet framework would not automatically start processing RX packets except at least one packet was transmitted. On systems with any kind of address auto configuration this issue was not detected, because some packets are send immediately after link state is changed to "running". With this patch we will notify usbnet about link status change provided by the PHYlib. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 632fa6c1d5e3..b4a1b7abcfc9 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -431,6 +431,7 @@ void asix_adjust_link(struct net_device *netdev) asix_write_medium_mode(dev, mode, 0); phy_print_status(phydev); + usbnet_link_change(dev, phydev->link, 0); } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) -- cgit v1.2.3 From ce95ab775f8d8e89a038c0e5611a7381a2ef8e43 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Jun 2022 09:51:39 +0200 Subject: net: usb: asix: do not force pause frames support We should respect link partner capabilities and not force flow control support on every link. Even more, in current state the MAC driver do not advertises pause support so we should not keep flow control enabled at all. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Signed-off-by: Oleksij Rempel Tested-by: Anton Lundin Link: https://lore.kernel.org/r/20220624075139.3139300-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2c81236c6c7c..45d3cc5cc355 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -126,8 +126,7 @@ AX_MEDIUM_RE) #define AX88772_MEDIUM_DEFAULT \ - (AX_MEDIUM_FD | AX_MEDIUM_RFC | \ - AX_MEDIUM_TFC | AX_MEDIUM_PS | \ + (AX_MEDIUM_FD | AX_MEDIUM_PS | \ AX_MEDIUM_AC | AX_MEDIUM_RE) /* AX88772 & AX88178 RX_CTL values */ -- cgit v1.2.3 From 3b0dc529f56b5f2328244130683210be98f16f7f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Jun 2022 14:00:15 +0200 Subject: ipv6: take care of disable_policy when restoring routes When routes corresponding to addresses are restored by fixup_permanent_addr(), the dst_nopolicy parameter was not set. The typical use case is a user that configures an address on a down interface and then put this interface up. Let's take care of this flag in addrconf_f6i_alloc(), so that every callers benefit ont it. CC: stable@kernel.org CC: David Forster Fixes: df789fe75206 ("ipv6: Provide ipv6 version of "disable_policy" sysctl") Reported-by: Siwar Zitouni Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220623120015.32640-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 4 ---- net/ipv6/route.c | 9 ++++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..5864cbc30db6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1109,10 +1109,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..828355710c57 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4569,8 +4569,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } -- cgit v1.2.3 From 8698e3bab4dd7968666e84e111d0bfd17c040e77 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2022 20:47:19 +0300 Subject: fanotify: refine the validation checks on non-dir inode mask Commit ceaf69f8eadc ("fanotify: do not allow setting dirent events in mask of non-dir") added restrictions about setting dirent events in the mask of a non-dir inode mark, which does not make any sense. For backward compatibility, these restictions were added only to new (v5.17+) APIs. It also does not make any sense to set the flags FAN_EVENT_ON_CHILD or FAN_ONDIR in the mask of a non-dir inode. Add these flags to the dir-only restriction of the new APIs as well. Move the check of the dir-only flags for new APIs into the helper fanotify_events_supported(), which is only called for FAN_MARK_ADD, because there is no need to error on an attempt to remove the dir-only flags from non-dir inode. Fixes: ceaf69f8eadc ("fanotify: do not allow setting dirent events in mask of non-dir") Link: https://lore.kernel.org/linux-fsdevel/20220627113224.kr2725conevh53u4@quack3.lan/ Link: https://lore.kernel.org/r/20220627174719.2838175-1-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 34 +++++++++++++++++++--------------- include/linux/fanotify.h | 4 ++++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c2255b440df9..b08ce0d821a7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1513,8 +1513,15 @@ static int fanotify_test_fid(struct dentry *dentry) return 0; } -static int fanotify_events_supported(struct path *path, __u64 mask) +static int fanotify_events_supported(struct fsnotify_group *group, + struct path *path, __u64 mask, + unsigned int flags) { + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ + bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || + (mask & FAN_RENAME); + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1526,6 +1533,16 @@ static int fanotify_events_supported(struct path *path, __u64 mask) if (mask & FANOTIFY_PERM_EVENTS && path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) return -EINVAL; + + /* + * We shouldn't have allowed setting dirent events and the directory + * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, + * but because we always allowed it, error only when using new APIs. + */ + if (strict_dir_events && mark_type == FAN_MARK_INODE && + !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + return -ENOTDIR; + return 0; } @@ -1672,7 +1689,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (flags & FAN_MARK_ADD) { - ret = fanotify_events_supported(&path, mask); + ret = fanotify_events_supported(group, &path, mask, flags); if (ret) goto path_put_and_out; } @@ -1695,19 +1712,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; - /* - * FAN_RENAME is not allowed on non-dir (for now). - * We shouldn't have allowed setting any dirent events in mask of - * non-dir, but because we always allowed it, error only if group - * was initialized with the new flag FAN_REPORT_TARGET_FID. - */ - ret = -ENOTDIR; - if (inode && !S_ISDIR(inode->i_mode) && - ((mask & FAN_RENAME) || - ((mask & FANOTIFY_DIRENT_EVENTS) && - FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) - goto path_put_and_out; - /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index edc28555814c..e517dbcf74ed 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -111,6 +111,10 @@ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW | FAN_ONDIR) +/* Events and flags relevant only for directories */ +#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \ + FAN_EVENT_ON_CHILD | FAN_ONDIR) + #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) -- cgit v1.2.3 From ab84db251c04d38b8dc7ee86e13d4050bedb1c88 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2022 10:28:13 +0000 Subject: net: bonding: fix possible NULL deref in rlb code syzbot has two reports involving the same root cause. bond_alb_initialize() must not set bond->alb_info.rlb_enabled if a memory allocation error is detected. Report 1: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 0 PID: 12276 Comm: kworker/u4:10 Not tainted 5.19.0-rc3-syzkaller-00132-g3b89b511ea0c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:rlb_clear_slave+0x10e/0x690 drivers/net/bonding/bond_alb.c:393 Code: 8e fc 83 fb ff 0f 84 74 02 00 00 e8 cc 2a 8e fc 48 8b 44 24 08 89 dd 48 c1 e5 06 4c 8d 34 28 49 8d 7e 14 48 89 f8 48 c1 e8 03 <42> 0f b6 14 20 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RSP: 0018:ffffc90018a8f678 EFLAGS: 00010203 RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88803375bb00 RSI: ffffffff84ec4ac4 RDI: 0000000000000014 RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffff8880ac889000 R14: 0000000000000000 R15: ffff88815a668c80 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005597077e10b0 CR3: 0000000026668000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bond_alb_deinit_slave+0x43c/0x6b0 drivers/net/bonding/bond_alb.c:1663 __bond_release_one.cold+0x383/0xd53 drivers/net/bonding/bond_main.c:2370 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3778 [inline] bond_netdev_event+0x993/0xad0 drivers/net/bonding/bond_main.c:3889 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] unregister_netdevice_many+0x948/0x18b0 net/core/dev.c:10839 default_device_exit_batch+0x449/0x590 net/core/dev.c:11333 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Report 2: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 5206 Comm: syz-executor.1 Not tainted 5.18.0-syzkaller-12108-g58f9d52ff689 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:rlb_req_update_slave_clients+0x109/0x2f0 drivers/net/bonding/bond_alb.c:502 Code: 5d 18 8f fc 41 80 3e 00 0f 85 a5 01 00 00 89 d8 48 c1 e0 06 49 03 84 24 68 01 00 00 48 8d 78 30 49 89 c7 48 89 fa 48 c1 ea 03 <80> 3c 2a 00 0f 85 98 01 00 00 4d 39 6f 30 75 83 e8 22 18 8f fc 49 RSP: 0018:ffffc9000300ee80 EFLAGS: 00010206 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90016c11000 RDX: 0000000000000006 RSI: ffffffff84eb6bf3 RDI: 0000000000000030 RBP: dffffc0000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff888027c80c80 R13: ffff88807d7ff800 R14: ffffed1004f901bd R15: 0000000000000000 FS: 00007f6f46c58700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 00000000516cc000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: alb_fasten_mac_swap+0x886/0xa80 drivers/net/bonding/bond_alb.c:1070 bond_alb_handle_active_change+0x624/0x1050 drivers/net/bonding/bond_alb.c:1765 bond_change_active_slave+0xfa1/0x29b0 drivers/net/bonding/bond_main.c:1173 bond_select_active_slave+0x23f/0xa50 drivers/net/bonding/bond_main.c:1253 bond_enslave+0x3b34/0x53b0 drivers/net/bonding/bond_main.c:2159 do_set_master+0x1c8/0x220 net/core/rtnetlink.c:2577 rtnl_newlink_create net/core/rtnetlink.c:3380 [inline] __rtnl_newlink+0x13ac/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xc90 net/core/rtnetlink.c:6089 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f6f45a89109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6f46c58168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f6f45b9c030 RCX: 00007f6f45a89109 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000006 RBP: 00007f6f45ae308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffed99029af R14: 00007f6f46c58300 R15: 0000000000022000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20220627102813.126264-1-edumazet@google.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 303c8d32d451..007d43e46dcb 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1302,12 +1302,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) return res; if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } -- cgit v1.2.3 From 0fe3dbbefb74a8575f61d7801b08dbc50523d60d Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 27 Jun 2022 22:00:04 +0800 Subject: linux/dim: Fix divide by 0 in RDMA DIM Fix a divide 0 error in rdma_dim_stats_compare() when prev->cpe_ratio == 0. CallTrace: Hardware name: H3C R4900 G3/RS33M2C9S, BIOS 2.00.37P21 03/12/2020 task: ffff880194b78000 task.stack: ffffc90006714000 RIP: 0010:backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: 0018:ffff880c10e83ec0 EFLAGS: 00010202 RAX: 0000000000002710 RBX: ffff88096cd7f780 RCX: 0000000000000064 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000001 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 000000001d7c6c09 R13: ffff88096cd7f780 R14: ffff880b174fe800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880c10e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000a0965b00 CR3: 000000000200a003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ib_poll_handler+0x43/0x80 [ib_core] irq_poll_softirq+0xae/0x110 __do_softirq+0xd1/0x28c irq_exit+0xde/0xf0 do_IRQ+0x54/0xe0 common_interrupt+0x8f/0x8f ? cpuidle_enter_state+0xd9/0x2a0 ? cpuidle_enter_state+0xc7/0x2a0 ? do_idle+0x170/0x1d0 ? cpu_startup_entry+0x6f/0x80 ? start_secondary+0x1b9/0x210 ? secondary_startup_64+0xa5/0xb0 Code: 0f 87 e1 00 00 00 8b 4c 24 14 44 8b 43 14 89 c8 4d 63 c8 44 29 c0 99 31 d0 29 d0 31 d2 48 98 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <49> f7 f1 48 83 f8 0a 0f 86 c1 00 00 00 44 39 c1 7f 10 48 89 df RIP: backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: ffff880c10e83ec0 Fixes: f4915455dcf0 ("linux/dim: Implement RDMA adaptive moderation (DIM)") Link: https://lore.kernel.org/r/20220627140004.3099-1-thomas.liu@ucloud.cn Signed-off-by: Tao Liu Reviewed-by: Max Gurtovoy Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/dim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dim.h b/include/linux/dim.h index b698266d0035..6c5733981563 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -21,7 +21,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /* * Calculate the gap between two values. -- cgit v1.2.3 From 3a0cf7ab8df3878a7e2f3d29275b785cf4e7afb6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:34 +0200 Subject: ACPI: video: Change how we determine if brightness key-presses are handled Some systems have an ACPI video bus but not ACPI video devices with backlight capability. On these devices brightness key-presses are (logically) not reported through the ACPI video bus. Change how acpi_video_handles_brightness_key_presses() determines if brightness key-presses are handled by the ACPI video driver to avoid vendor specific drivers/platform/x86 drivers filtering out their brightness key-presses even though they are the only ones reporting these presses. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Acked-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-2-hdegoede@redhat.com --- drivers/acpi/acpi_video.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index e07782b1fbb6..43177c20ce4f 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,6 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); +static bool has_backlight; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1222,6 +1223,9 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_bind(video, data); acpi_video_device_find_cap(data); + if (data->cap._BCM && data->cap._BCL) + has_backlight = true; + mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); mutex_unlock(&video->device_list_lock); @@ -2249,6 +2253,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; + has_backlight = false; } mutex_unlock(®ister_count_mutex); } @@ -2270,13 +2275,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - bool have_video_busses; - - mutex_lock(&video_list_lock); - have_video_busses = !list_empty(&video_bus_head); - mutex_unlock(&video_list_lock); - - return have_video_busses && + return has_backlight && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); -- cgit v1.2.3 From 65a3e6c8d3f7c346813a05f3d76fc46b640d76d6 Mon Sep 17 00:00:00 2001 From: Stefan Seyfried Date: Fri, 24 Jun 2022 13:23:35 +0200 Subject: platform/x86: panasonic-laptop: de-obfuscate button codes In the definition of panasonic_keymap[] the key codes are given in decimal, later checks are done with hexadecimal values, which does not help in understanding the code. Additionally use two helper variables to shorten the code and make the logic more obvious. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Signed-off-by: Stefan Seyfried Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-3-hdegoede@redhat.com --- drivers/platform/x86/panasonic-laptop.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 37850d07987d..ca6137f4000f 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -762,6 +762,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) struct input_dev *hotk_input_dev = pcc->input_dev; int rc; unsigned long long result; + unsigned int key; + unsigned int updown; rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY, NULL, &result); @@ -770,18 +772,22 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) return; } + key = result & 0xf; + updown = result & 0x80; /* 0x80 == key down; 0x00 = key up */ + /* hack: some firmware sends no key down for sleep / hibernate */ - if ((result & 0xf) == 0x7 || (result & 0xf) == 0xa) { - if (result & 0x80) + if (key == 7 || key == 10) { + if (updown) sleep_keydown_seen = 1; if (!sleep_keydown_seen) sparse_keymap_report_event(hotk_input_dev, - result & 0xf, 0x80, false); + key, 0x80, false); } - if ((result & 0xf) == 0x7 || (result & 0xf) == 0x9 || (result & 0xf) == 0xa) { + /* for the magic values, see panasonic_keymap[] above */ + if (key == 7 || key == 9 || key == 10) { if (!sparse_keymap_report_event(hotk_input_dev, - result & 0xf, result & 0x80, false)) + key, updown, false)) pr_err("Unknown hotkey event: 0x%04llx\n", result); } } -- cgit v1.2.3 From fe4326c8d18dc8a54affdc9ab269ad92dafef659 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:36 +0200 Subject: platform/x86: panasonic-laptop: sort includes alphabetically Sort includes alphabetically, small cleanup patch in preparation of further changes. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-4-hdegoede@redhat.com --- drivers/platform/x86/panasonic-laptop.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index ca6137f4000f..26e31ac09dc6 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -119,20 +119,19 @@ * - v0.1 start from toshiba_acpi driver written by John Belmonte */ -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include #include #include +#include +#include #include - +#include +#include +#include +#include MODULE_AUTHOR("Hiroshi Miura "); MODULE_AUTHOR("David Bronaugh "); -- cgit v1.2.3 From 83a5ddc3dc561c40d948b85553514aaba99123d8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:37 +0200 Subject: platform/x86: panasonic-laptop: revert "Resolve hotkey double trigger bug" In hindsight blindly throwing away most of the key-press events is not a good idea. So revert commit ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug"). Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-5-hdegoede@redhat.com --- drivers/platform/x86/panasonic-laptop.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 26e31ac09dc6..2e6531dd15f9 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -783,12 +783,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) key, 0x80, false); } - /* for the magic values, see panasonic_keymap[] above */ - if (key == 7 || key == 9 || key == 10) { - if (!sparse_keymap_report_event(hotk_input_dev, - key, updown, false)) - pr_err("Unknown hotkey event: 0x%04llx\n", result); - } + if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false)) + pr_err("Unknown hotkey event: 0x%04llx\n", result); } static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event) -- cgit v1.2.3 From 1f2c9de83a50447a2d7166f6273ab0c0e97cd68e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:38 +0200 Subject: platform/x86: panasonic-laptop: don't report duplicate brightness key-presses The brightness key-presses might also get reported by the ACPI video bus, check for this and in this case don't report the presses to avoid reporting 2 presses for a single key-press. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-6-hdegoede@redhat.com --- drivers/platform/x86/Kconfig | 1 + drivers/platform/x86/panasonic-laptop.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index f08ad85683cb..b59063f83104 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -945,6 +945,7 @@ config PANASONIC_LAPTOP tristate "Panasonic Laptop Extras" depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE + depends on ACPI_VIDEO=n || ACPI_VIDEO select INPUT_SPARSEKMAP help This driver adds support for access to backlight control and hotkeys diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 2e6531dd15f9..d65e6c2372ca 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -132,6 +132,7 @@ #include #include #include +#include MODULE_AUTHOR("Hiroshi Miura "); MODULE_AUTHOR("David Bronaugh "); @@ -783,6 +784,13 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) key, 0x80, false); } + /* + * Don't report brightness key-presses if they are also reported + * by the ACPI video bus. + */ + if ((key == 1 || key == 2) && acpi_video_handles_brightness_key_presses()) + return; + if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false)) pr_err("Unknown hotkey event: 0x%04llx\n", result); } -- cgit v1.2.3 From aacb455dfe01b7a24a792a2fbe7a04112ce8321d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Jun 2022 13:23:39 +0200 Subject: platform/x86: panasonic-laptop: filter out duplicate volume up/down/mute keypresses On some Panasonic models the volume up/down/mute keypresses get reported both through the Panasonic ACPI HKEY interface as well as through the atkbd device. Filter out the atkbd scan-codes for these to avoid reporting presses twice. Note normally we would leave the filtering of these to userspace by mapping the scan-codes to KEY_UNKNOWN through /lib/udev/hwdb.d/60-keyboard.hwdb. However in this case that would cause regressions since we were filtering the Panasonic ACPI HKEY events before, so filter these in the kernel. Fixes: ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug") Reported-and-tested-by: Stefan Seyfried Reported-and-tested-by: Kenneth Chan Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220624112340.10130-7-hdegoede@redhat.com --- drivers/platform/x86/Kconfig | 1 + drivers/platform/x86/panasonic-laptop.c | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index b59063f83104..bc4013e950ed 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -946,6 +946,7 @@ config PANASONIC_LAPTOP depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO=n || ACPI_VIDEO + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP help This driver adds support for access to backlight control and hotkeys diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index d65e6c2372ca..615e39cbbbf1 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ #include #include #include +#include #include #include #include @@ -241,6 +243,42 @@ struct pcc_acpi { struct platform_device *platform; }; +/* + * On some Panasonic models the volume up / down / mute keys send duplicate + * keypress events over the PS/2 kbd interface, filter these out. + */ +static bool panasonic_i8042_filter(unsigned char data, unsigned char str, + struct serio *port) +{ + static bool extended; + + if (str & I8042_STR_AUXDATA) + return false; + + if (data == 0xe0) { + extended = true; + return true; + } else if (extended) { + extended = false; + + switch (data & 0x7f) { + case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */ + case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */ + case 0x30: /* e0 30 / e0 b0, Volume Up press / release */ + return true; + default: + /* + * Report the previously filtered e0 before continuing + * with the next non-filtered byte. + */ + serio_interrupt(port, 0xe0, 0); + return false; + } + } + + return false; +} + /* method access functions */ static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val) { @@ -1006,6 +1044,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) pcc->platform = NULL; } + i8042_install_filter(panasonic_i8042_filter); return 0; out_platform: @@ -1029,6 +1068,8 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device) if (!device || !pcc) return -EINVAL; + i8042_remove_filter(panasonic_i8042_filter); + if (pcc->platform) { device_remove_file(&pcc->platform->dev, &dev_attr_cdpower); platform_device_unregister(pcc->platform); -- cgit v1.2.3 From 42504af775361ca2330a2bfde496a5ebc5655c86 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Fri, 3 Jun 2022 13:02:09 -0400 Subject: platform/x86: thinkpad-acpi: profile capabilities as integer Currently the active mode (PSC/MMC) is stored in an enum and queried throughout the driver. Other driver changes will enumerate additional submodes that are relevant to be tracked, so instead track PSC/MMC in a single integer variable. Co-developed-by: Mario Limonciello Signed-off-by: Mario Limonciello Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220603170212.164963-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 45 +++++++++++++++--------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index aa6ffeaa3932..b4d6a356b746 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10300,21 +10300,15 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_DISABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 0) #define DYTC_ENABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 1) -enum dytc_profile_funcmode { - DYTC_FUNCMODE_NONE = 0, - DYTC_FUNCMODE_MMC, - DYTC_FUNCMODE_PSC, -}; - -static enum dytc_profile_funcmode dytc_profile_available; static enum platform_profile_option dytc_current_profile; static atomic_t dytc_ignore_event = ATOMIC_INIT(0); static DEFINE_MUTEX(dytc_mutex); +static int dytc_capabilities; static bool dytc_mmc_get_available; static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) { - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10331,7 +10325,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p } return 0; } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10353,21 +10347,21 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe { switch (profile) { case PLATFORM_PROFILE_LOW_POWER: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_LOWPOWER; break; case PLATFORM_PROFILE_BALANCED: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_BALANCE; break; case PLATFORM_PROFILE_PERFORMANCE: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_PERFORM; break; default: /* Unknown profile */ @@ -10446,7 +10440,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, if (err) goto unlock; - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (profile == PLATFORM_PROFILE_BALANCED) { /* * To get back to balanced mode we need to issue a reset command. @@ -10465,7 +10459,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, goto unlock; } } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; @@ -10484,12 +10478,12 @@ static void dytc_profile_refresh(void) int perfmode; mutex_lock(&dytc_mutex); - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (dytc_mmc_get_available) err = dytc_command(DYTC_CMD_MMC_GET, &output); else err = dytc_cql_command(DYTC_CMD_GET, &output); - } else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) err = dytc_command(DYTC_CMD_GET, &output); mutex_unlock(&dytc_mutex); @@ -10518,7 +10512,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) set_bit(PLATFORM_PROFILE_BALANCED, dytc_profile.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, dytc_profile.choices); - dytc_profile_available = DYTC_FUNCMODE_NONE; err = dytc_command(DYTC_CMD_QUERY, &output); if (err) return err; @@ -10531,13 +10524,12 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return -ENODEV; /* Check what capabilities are supported */ - err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + err = dytc_command(DYTC_CMD_FUNC_CAP, &dytc_capabilities); if (err) return err; - if (output & BIT(DYTC_FC_MMC)) { /* MMC MODE */ - dytc_profile_available = DYTC_FUNCMODE_MMC; - + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { /* MMC MODE */ + pr_debug("MMC is supported\n"); /* * Check if MMC_GET functionality available * Version > 6 and return success from MMC_GET command @@ -10548,8 +10540,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (!err && ((output & DYTC_ERR_MASK) == DYTC_ERR_SUCCESS)) dytc_mmc_get_available = true; } - } else if (output & BIT(DYTC_FC_PSC)) { /* PSC MODE */ - dytc_profile_available = DYTC_FUNCMODE_PSC; + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; @@ -10575,7 +10567,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) static void dytc_profile_exit(void) { - dytc_profile_available = DYTC_FUNCMODE_NONE; platform_profile_remove(); } -- cgit v1.2.3 From bce6243f767f7da88aa4674d5d678f9f156eaba9 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 27 Jun 2022 14:14:49 -0400 Subject: platform/x86: thinkpad_acpi: do not use PSC mode on Intel platforms PSC platform profile mode is only supported on Linux for AMD platforms. Some older Intel platforms (e.g T490) are advertising it's capability as Windows uses it - but on Linux we should only be using MMC profile for Intel systems. Add a check to prevent it being enabled incorrectly. Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220627181449.3537-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b4d6a356b746..a8b383051528 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10541,6 +10541,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) dytc_mmc_get_available = true; } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + /* Support for this only works on AMD platforms */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + dbg_printk(TPACPI_DBG_INIT, "PSC not support on Intel platforms\n"); + return -ENODEV; + } pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); -- cgit v1.2.3 From 9ab762a84b8094540c18a170e5ddd6488632c456 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 28 Jun 2022 20:37:26 +0800 Subject: platform/x86: hp-wmi: Ignore Sanitization Mode event After system resume the hp-wmi driver may complain: [ 702.620180] hp_wmi: Unknown event_id - 23 - 0x0 According to HP it means 'Sanitization Mode' and it's harmless to just ignore the event. Cc: Jorge Lopez Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220628123726.250062-1-kai.heng.feng@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0d8cb22e30df..bc7020e9df9e 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -89,6 +89,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; /* @@ -853,6 +854,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; -- cgit v1.2.3 From 0c1f78a49af721490a5ad70b73e8b4d382465dae Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:35 -0700 Subject: mptcp: fix error mibs accounting The current accounting for MP_FAIL and FASTCLOSE is not very accurate: both can be increased even when the related option is not really sent. Move the accounting into the correct place. Fixes: eb7f33654dc1 ("mptcp: add the mibs for MP_FAIL") Fixes: 1e75629cb964 ("mptcp: add the mibs for MP_FASTCLOSE") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 5 +++-- net/mptcp/pm.c | 1 - net/mptcp/subflow.c | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index be3b918a6d15..2a6351d55a7d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -765,6 +765,7 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); return true; } @@ -788,6 +789,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, opts->rcvr_key = msk->remote_key; pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -809,6 +811,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->fail_seq = subflow->map_seq; pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; } @@ -833,13 +836,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); } /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); } return true; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 59a85220edc9..91a62b598de4 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -310,7 +310,6 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("send MP_FAIL response and infinite map"); subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); subflow->send_infinite_map = 1; } else if (!sock_flag(sk, SOCK_DEAD)) { pr_debug("MP_FAIL response received"); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8841e8cd9ad8..50ad19adc003 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -958,10 +958,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - if (subflow->mp_join || subflow->valid_csum_seen) { + if (subflow->mp_join || subflow->valid_csum_seen) subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); - } return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; } -- cgit v1.2.3 From 31bf11de146c3f8892093ff39f8f9b3069d6a852 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:36 -0700 Subject: mptcp: introduce MAPPING_BAD_CSUM This allow moving a couple of conditional out of the fast path, making the code more easy to follow and will simplify the next patch. Fixes: ae66fb2ba6c3 ("mptcp: Do TCP fallback on early DSS checksum failure") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50ad19adc003..42a7c18a1c24 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -843,7 +843,8 @@ enum mapping_status { MAPPING_INVALID, MAPPING_EMPTY, MAPPING_DATA_FIN, - MAPPING_DUMMY + MAPPING_DUMMY, + MAPPING_BAD_CSUM }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -958,9 +959,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - if (subflow->mp_join || subflow->valid_csum_seen) - subflow->send_mp_fail = 1; - return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + return MAPPING_BAD_CSUM; } subflow->valid_csum_seen = 1; @@ -1182,10 +1181,8 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (unlikely(status == MAPPING_INVALID)) - goto fallback; - - if (unlikely(status == MAPPING_DUMMY)) + if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || + status == MAPPING_BAD_CSUM)) goto fallback; if (status != MAPPING_OK) @@ -1227,7 +1224,10 @@ no_data: fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ - if (subflow->send_mp_fail) { + if (status == MAPPING_BAD_CSUM && + (subflow->mp_join || subflow->valid_csum_seen)) { + subflow->send_mp_fail = 1; + if (!READ_ONCE(msk->allow_infinite_fallback)) { ssk->sk_err = EBADMSG; tcp_set_state(ssk, TCP_CLOSE); -- cgit v1.2.3 From 76a13b315709b5b65a7b65caf9ede9a8a38d8930 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 27 Jun 2022 18:02:37 -0700 Subject: mptcp: invoke MP_FAIL response when needed mptcp_mp_fail_no_response shouldn't be invoked on each worker run, it should be invoked only when MP_FAIL response timeout occurs. This patch refactors the MP_FAIL response logic. It leverages the fact that only the MPC/first subflow can gracefully fail to avoid unneeded subflows traversal: the failing subflow can be only msk->first. A new 'fail_tout' field is added to the subflow context to record the MP_FAIL response timeout and use such field to reliably share the timeout timer between the MP_FAIL event and the MPTCP socket close timeout. Finally, a new ack is generated to send out MP_FAIL notification as soon as we hit the relevant condition, instead of waiting a possibly unbound time for the next data packet. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/281 Fixes: d9fb797046c5 ("mptcp: Do not traverse the subflow connection list without lock") Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 9 +++--- net/mptcp/protocol.c | 77 ++++++++++++++++++++++++++++++++-------------------- net/mptcp/protocol.h | 3 +- net/mptcp/subflow.c | 38 ++++++++++++++++++++------ 4 files changed, 82 insertions(+), 45 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 91a62b598de4..45e2a48397b9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -299,22 +299,21 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct sock *s = (struct sock *)msk; pr_debug("fail_seq=%llu", fail_seq); if (!READ_ONCE(msk->allow_infinite_fallback)) return; - if (!READ_ONCE(subflow->mp_fail_response_expect)) { + if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map"); subflow->send_mp_fail = 1; subflow->send_infinite_map = 1; - } else if (!sock_flag(sk, SOCK_DEAD)) { + tcp_send_ack(sk); + } else { pr_debug("MP_FAIL response received"); - - sk_stop_timer(s, &s->sk_timer); + WRITE_ONCE(subflow->fail_tout, 0); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 17e13396024a..e6fcb61443dd 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -500,7 +500,7 @@ static void mptcp_set_timeout(struct sock *sk) __mptcp_set_timeout(sk, tout); } -static bool tcp_can_send_ack(const struct sock *ssk) +static inline bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); @@ -2175,21 +2175,6 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static struct mptcp_subflow_context * -mp_fail_response_expect_subflow(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow, *ret = NULL; - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->mp_fail_response_expect)) { - ret = subflow; - break; - } - } - - return ret; -} - static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2518,27 +2503,50 @@ reset_timer: mptcp_reset_timer(sk); } +/* schedule the timeout timer for the relevant event: either close timeout + * or mp_fail timeout. The close timeout takes precedence on the mp_fail one + */ +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +{ + struct sock *sk = (struct sock *)msk; + unsigned long timeout, close_timeout; + + if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + return; + + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + + /* the close timeout takes precedence on the fail one, and here at least one of + * them is active + */ + timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + + sk_reset_timer(sk, &sk->sk_timer, timeout); +} + static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; - struct sock *ssk; + struct sock *ssk = msk->first; bool slow; - subflow = mp_fail_response_expect_subflow(msk); - if (subflow) { - pr_debug("MP_FAIL doesn't respond, reset the subflow"); + if (!ssk) + return; - ssk = mptcp_subflow_tcp_sock(subflow); - slow = lock_sock_fast(ssk); - mptcp_subflow_reset(ssk); - unlock_sock_fast(ssk, slow); - } + pr_debug("MP_FAIL doesn't respond, reset the subflow"); + + slow = lock_sock_fast(ssk); + mptcp_subflow_reset(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); + unlock_sock_fast(ssk, slow); + + mptcp_reset_timeout(msk, 0); } static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *sk = &msk->sk.icsk_inet.sk; + unsigned long fail_tout; int state; lock_sock(sk); @@ -2575,7 +2583,9 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - mptcp_mp_fail_no_response(msk); + fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0; + if (fail_tout && time_after(jiffies, fail_tout)) + mptcp_mp_fail_no_response(msk); unlock: release_sock(sk); @@ -2822,6 +2832,7 @@ static void __mptcp_destroy_sock(struct sock *sk) static void mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; lock_sock(sk); @@ -2840,10 +2851,16 @@ static void mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); + /* since the close timeout takes precedence on the fail one, + * cancel the latter + */ + if (ssk == msk->first) + subflow->fail_tout = 0; + sock_orphan(ssk); unlock_sock_fast(ssk, slow); } @@ -2852,13 +2869,13 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); if (mptcp_sk(sk)->token) - mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); + mptcp_reset_timeout(msk, 0); } release_sock(sk); if (do_cancel_work) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 200f89f6d62f..1d2d71711872 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -468,7 +468,6 @@ struct mptcp_subflow_context { local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; - bool mp_fail_response_expect; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -482,6 +481,7 @@ struct mptcp_subflow_context { u8 stale_count; long delegated_status; + unsigned long fail_tout; ); @@ -662,6 +662,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 42a7c18a1c24..8dfea6a8a82f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -971,7 +971,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool csum_reqd = READ_ONCE(msk->csum_enabled); - struct sock *sk = (struct sock *)msk; struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -1013,9 +1012,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; - if (!sock_flag(ssk, SOCK_DEAD)) - sk_stop_timer(sk, &sk->sk_timer); - return MAPPING_INVALID; } @@ -1162,6 +1158,33 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) return !subflow->fully_established; } +static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + unsigned long fail_tout; + + /* greceful failure can happen only on the MPC subflow */ + if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) + return; + + /* since the close timeout take precedence on the fail one, + * no need to start the latter when the first is already set + */ + if (sock_flag((struct sock *)msk, SOCK_DEAD)) + return; + + /* we don't need extreme accuracy here, use a zero fail_tout as special + * value meaning no fail timeout at all; + */ + fail_tout = jiffies + TCP_RTO_MAX; + if (!fail_tout) + fail_tout = 1; + WRITE_ONCE(subflow->fail_tout, fail_tout); + tcp_send_ack(ssk); + + mptcp_reset_timeout(msk, subflow->fail_tout); +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1236,11 +1259,8 @@ fallback: tcp_send_active_reset(ssk, GFP_ATOMIC); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - } else if (!sock_flag(ssk, SOCK_DEAD)) { - WRITE_ONCE(subflow->mp_fail_response_expect, true); - sk_reset_timer((struct sock *)msk, - &((struct sock *)msk)->sk_timer, - jiffies + TCP_RTO_MAX); + } else { + mptcp_subflow_fail(msk, ssk); } WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); return true; -- cgit v1.2.3 From d51991e2e31477853e5b9c1005ac617707077286 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:38 -0700 Subject: mptcp: fix shutdown vs fallback race If the MPTCP socket shutdown happens before a fallback to TCP, and all the pending data have been already spooled, we never close the TCP connection. Address the issue explicitly checking for critical condition at fallback time. Fixes: 1e39e5a32ad7 ("mptcp: infinite mapping sending") Fixes: 0348c690ed37 ("mptcp: add the fallback check") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 19 ++++++++++++++++--- net/mptcp/subflow.c | 2 +- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2a6351d55a7d..aead331866a0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -967,7 +967,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, goto reset; subflow->mp_capable = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); return false; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e6fcb61443dd..e63bc2bb7fff 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1245,7 +1245,7 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1d2d71711872..9860179bfd5e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -927,12 +927,25 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } -static inline void mptcp_do_fallback(struct sock *sk) +static inline void mptcp_do_fallback(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; + msk = mptcp_sk(sk); __mptcp_do_fallback(msk); + if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { + gfp_t saved_allocation = ssk->sk_allocation; + + /* we are in a atomic (BH) scope, override ssk default for data + * fin allocation + */ + ssk->sk_allocation = GFP_ATOMIC; + ssk->sk_shutdown |= SEND_SHUTDOWN; + tcp_shutdown(ssk, SEND_SHUTDOWN); + ssk->sk_allocation = saved_allocation; + } } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8dfea6a8a82f..b34b96fb742f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1279,7 +1279,7 @@ fallback: return false; } - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); -- cgit v1.2.3 From f745a3ebdfb9041d3a55e66eb345895cd8ecc90c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:39 -0700 Subject: mptcp: consistent map handling on failure When the MPTCP receive path reach a non fatal fall-back condition, e.g. when the MPC sockets must fall-back to TCP, the existing code is a little self-inconsistent: it reports that new data is available - return true - but sets the MPC flag to the opposite value. As the consequence read operations in some exceptional scenario may block unexpectedly. Address the issue setting the correct MPC read status. Additionally avoid some code duplication in the fatal fall-back scenario. Fixes: 9c81be0dbc89 ("mptcp: add MP_FAIL response support") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b34b96fb742f..03862103665d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1252,17 +1252,12 @@ fallback: subflow->send_mp_fail = 1; if (!READ_ONCE(msk->allow_infinite_fallback)) { - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; - tcp_send_active_reset(ssk, GFP_ATOMIC); - while ((skb = skb_peek(&ssk->sk_receive_queue))) - sk_eat_skb(ssk, skb); - } else { - mptcp_subflow_fail(msk, ssk); + goto reset; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + mptcp_subflow_fail(msk, ssk); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; } @@ -1270,10 +1265,14 @@ fallback: /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; + +reset: + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); return false; -- cgit v1.2.3 From 6aeed9045071f2252ff4e98fc13d1e304f33e5b0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:40 -0700 Subject: mptcp: fix race on unaccepted mptcp sockets When the listener socket owning the relevant request is closed, it frees the unaccepted subflows and that causes later deletion of the paired MPTCP sockets. The mptcp socket's worker can run in the time interval between such delete operations. When that happens, any access to msk->first will cause an UaF access, as the subflow cleanup did not cleared such field in the mptcp socket. Address the issue explicitly traversing the listener socket accept queue at close time and performing the needed cleanup on the pending msk. Note that the locking is a bit tricky, as we need to acquire the msk socket lock, while still owning the subflow socket one. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 +++++ net/mptcp/protocol.h | 2 ++ net/mptcp/subflow.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e63bc2bb7fff..e475212f2618 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2331,6 +2331,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { + tcp_set_state(ssk, TCP_CLOSE); + mptcp_subflow_queue_clean(ssk); + inet_csk_listen_stop(ssk); + } __tcp_close(ssk, 0); /* close acquired an extra ref */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9860179bfd5e..c14d70c036d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -306,6 +306,7 @@ struct mptcp_sock { u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + struct mptcp_sock *dl_next; }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -608,6 +609,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 03862103665d..63e8892ec807 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1723,6 +1723,58 @@ static void subflow_state_change(struct sock *sk) } } +void mptcp_subflow_queue_clean(struct sock *listener_ssk) +{ + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); + for (req = queue->rskq_accept_head; req; req = req->dl_next) { + struct mptcp_subflow_context *subflow; + struct sock *ssk = req->sk; + struct mptcp_sock *msk; + + if (!sk_is_mptcp(ssk)) + continue; + + subflow = mptcp_subflow_ctx(ssk); + if (!subflow || !subflow->conn) + continue; + + /* skip if already in list */ + msk = mptcp_sk(subflow->conn); + if (msk->dl_next || msk == head) + continue; + + msk->dl_next = head; + head = msk; + } + spin_unlock_bh(&queue->rskq_lock); + if (!head) + return; + + /* can't acquire the msk socket lock under the subflow one, + * or will cause ABBA deadlock + */ + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { + struct sock *sk = (struct sock *)msk; + bool slow; + + slow = lock_sock_fast_nested(sk); + next = msk->dl_next; + msk->first = NULL; + msk->dl_next = NULL; + unlock_sock_fast(sk, slow); + } + + /* we are still under the listener msk socket lock */ + lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); +} + static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.2.3 From 42fb6cddec3b306c9f6ef136b6438e0de1836431 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 27 Jun 2022 18:02:41 -0700 Subject: selftests: mptcp: more stable diag tests The mentioned test-case still use an hard-coded-len sleep to wait for a relative large number of connection to be established. On very slow VM and with debug build such timeout could be exceeded, causing failures in our CI. Address the issue polling for the expected condition several times, up to an unreasonable high amount of time. On reasonably fast system the self-tests will be faster then before, on very slow one we will still catch the correct condition. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 48 +++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 9dd43d7d957b..515859a5168b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -61,6 +61,39 @@ chk_msk_nr() __chk_nr "grep -c token:" $* } +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + printf "%-50s" "$msg" + if [ $i -ge $timeout ]; then + echo "[ fail ] timeout while expecting $expected max $max last $nr" + ret=$test_cnt + elif [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + chk_msk_fallback_nr() { __chk_nr "grep -c fallback" $* @@ -146,7 +179,7 @@ ip -n $ns link set dev lo up echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" @@ -155,7 +188,7 @@ chk_msk_listen 10000 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " @@ -167,13 +200,13 @@ flush_pids echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" @@ -184,7 +217,7 @@ for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -l -w 10 \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done wait_local_port_listen $ns $((NR_CLIENTS + 10001)) @@ -193,12 +226,11 @@ for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -w 10 \ + ./mptcp_connect -p $((I+10001)) -w 20 \ -t ${timeout_poll} 127.0.0.1 >/dev/null & done -sleep 1.5 -chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" flush_pids exit $ret -- cgit v1.2.3 From 06e445f740c1a0fe5d16b3dff8a4ef18e124e54e Mon Sep 17 00:00:00 2001 From: Ossama Othman Date: Mon, 27 Jun 2022 18:02:42 -0700 Subject: mptcp: fix conflict with Including before the C library header causes symbol redefinition errors at compile-time due to duplicate declarations and definitions in the header included by . Explicitly include before in when __KERNEL__ is not defined so that the C library compatibility logic in is enabled when including in user space code. Fixes: c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support") Signed-off-by: Ossama Othman Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 921963589904..dfe19bf13f4c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -2,16 +2,17 @@ #ifndef _UAPI_MPTCP_H #define _UAPI_MPTCP_H +#ifndef __KERNEL__ +#include /* for sockaddr_in and sockaddr_in6 */ +#include /* for struct sockaddr */ +#endif + #include #include #include /* for sockaddr_in */ #include /* for sockaddr_in6 */ #include /* for sockaddr_storage and sa_family */ -#ifndef __KERNEL__ -#include /* for struct sockaddr */ -#endif - #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) #define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) -- cgit v1.2.3 From fd37c2ecb21f7aee04ccca5f561469f07d00063c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 27 Jun 2022 18:02:43 -0700 Subject: selftests: mptcp: Initialize variables to quiet gcc 12 warnings In a few MPTCP selftest tools, gcc 12 complains that the 'sock' variable might be used uninitialized. This is a false positive because the only code path that could lead to uninitialized access is where getaddrinfo() fails, but the local xgetaddrinfo() wrapper exits if such a failure occurs. Initialize the 'sock' variable anyway to allow the tools to build with gcc 12. Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Acked-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 2 +- tools/testing/selftests/net/mptcp/mptcp_inq.c | 2 +- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8628aa61b763..e2ea6c126c99 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -265,7 +265,7 @@ static void sock_test_tcpulp(int sock, int proto, unsigned int line) static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 29f75e2a1116..8672d898f8cd 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -88,7 +88,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index ac9a4d9c1764..ae61f39556ca 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -136,7 +136,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, -- cgit v1.2.3 From adabdd8f6acabc0c3fdbba2e7f5a2edd9c5ef22d Mon Sep 17 00:00:00 2001 From: katrinzhou Date: Tue, 28 Jun 2022 11:50:30 +0800 Subject: ipv6/sit: fix ipip6_tunnel_get_prl return value When kcalloc fails, ipip6_tunnel_get_prl() should return -ENOMEM. Move the position of label "out" to return correctly. Addresses-Coverity: ("Unused value") Fixes: 300aaeeaab5f ("[IPV6] SIT: Add SIOCGETPRL ioctl to get/dump PRL.") Signed-off-by: katrinzhou Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220628035030.1039171-1-zys.zljxml@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c20992..6bcd5e419a08 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; - rcu_read_lock(); - ca = min(t->prl_count, cmax); if (!kp) { @@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u } } - c = 0; + rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; @@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u if (kprl.addr != htonl(INADDR_ANY)) break; } -out: + rcu_read_unlock(); len = sizeof(*kp) * c; @@ -362,7 +360,7 @@ out: ret = -EFAULT; kfree(kp); - +out: return ret; } -- cgit v1.2.3 From 53ad46169fe2996fe1b623ba6c9c4fa33847876f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 28 Jun 2022 11:31:34 +0800 Subject: net: ipv6: unexport __init-annotated seg6_hmac_net_init() As of commit 5801f064e351 ("net: ipv6: unexport __init-annotated seg6_hmac_init()"), EXPORT_SYMBOL and __init is a bad combination because the .init.text section is freed up after the initialization. Hence, modules cannot use symbols annotated __init. The access to a freed symbol may end up with kernel panic. This remove the EXPORT_SYMBOL to fix modpost warning: WARNING: modpost: vmlinux.o(___ksymtab+seg6_hmac_net_init+0x0): Section mismatch in reference from the variable __ksymtab_seg6_hmac_net_init to the function .init.text:seg6_hmac_net_init() The symbol seg6_hmac_net_init is exported and annotated __init Fix this by removing the __init annotation of seg6_hmac_net_init or drop the export. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Reported-by: Hulk Robot Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20220628033134.21088-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_hmac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 6de01185cc68..d43c50a7310d 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -406,7 +406,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return rhashtable_init(&sdata->hmac_infos, &rht_params); } -EXPORT_SYMBOL(seg6_hmac_net_init); void seg6_hmac_exit(void) { -- cgit v1.2.3 From 5a478a653b4cca148d5c89832f007ec0809d7e6d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 27 Jun 2022 14:40:48 +0200 Subject: nfc: nfcmrvl: Fix irq_of_parse_and_map() return value The irq_of_parse_and_map() returns 0 on failure, not a negative ERRNO. Reported-by: Lv Ruyi Fixes: caf6e49bf6d0 ("NFC: nfcmrvl: add spi driver") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220627124048.296253-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- drivers/nfc/nfcmrvl/i2c.c | 6 +++--- drivers/nfc/nfcmrvl/spi.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index ceef81d93ac9..01329b91d59d 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -167,9 +167,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index a38e2fcdfd39..ad3359a4942c 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -115,9 +115,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, } ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; -- cgit v1.2.3 From 00aff3590fc0a73bddd3b743863c14e76fd35c0c Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 29 Jun 2022 14:34:18 +0800 Subject: net: tipc: fix possible refcount leak in tipc_sk_create() Free sk in case tipc_sk_insert() fails. Signed-off-by: Hangyu Hua Reviewed-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 17f8c523e33b..43509c7e90fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } -- cgit v1.2.3 From eddd95b9423946aaacb55cac6a9b2cea8ab944fc Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 27 Jun 2022 19:06:42 +0200 Subject: NFC: nxp-nci: Don't issue a zero length i2c_master_read() There are packets which doesn't have a payload. In that case, the second i2c_master_read() will have a zero length. But because the NFC controller doesn't have any data left, it will NACK the I2C read and -ENXIO will be returned. In case there is no payload, just skip the second i2c master read. Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Signed-off-by: Michael Walle Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 7e451c10985d..e8f3b35afbee 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -162,6 +162,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE); + if (!header.plen) + return 0; + r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); if (r != header.plen) { nfc_err(&client->dev, -- cgit v1.2.3 From 9577fc5fdc8b07b891709af6453545db405e24ad Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 27 Jun 2022 19:06:43 +0200 Subject: NFC: nxp-nci: don't print header length mismatch on i2c error Don't print a misleading header length mismatch error if the i2c call returns an error. Instead just return the error code without any error message. Signed-off-by: Michael Walle Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index e8f3b35afbee..ae2ba08d8ac3 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -166,7 +168,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, return 0; r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); -- cgit v1.2.3 From 7b92aa9e613508cbaa29dd35bf27db4c35628b10 Mon Sep 17 00:00:00 2001 From: Coleman Dietsch Date: Tue, 28 Jun 2022 12:47:44 -0500 Subject: selftests net: fix kselftest net fatal error The incorrect path is causing the following error when trying to run net kselftests: In file included from bpf/nat6to4.c:43: ../../../lib/bpf/bpf_helpers.h:11:10: fatal error: 'bpf_helper_defs.h' file not found ^~~~~~~~~~~~~~~~~~~ 1 error generated. Fixes: cf67838c4422 ("selftests net: fix bpf build error") Signed-off-by: Coleman Dietsch Link: https://lore.kernel.org/r/20220628174744.7908-1-dietschc@csp.edu Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index 8a69c91fcca0..8ccaf8732eb2 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -2,7 +2,7 @@ CLANG ?= clang CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../lib +CCINCLUDE += -I../../../../lib CCINCLUDE += -I../../../../../usr/include/ TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o -- cgit v1.2.3 From e65af5403e462ccd7dff6a045a886c64da598c2e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 28 Jun 2022 11:35:17 +0200 Subject: usbnet: fix memory allocation in helpers usbnet provides some helper functions that are also used in the context of reset() operations. During a reset the other drivers on a device are unable to operate. As that can be block drivers, a driver for another interface cannot use paging in its memory allocations without risking a deadlock. Use GFP_NOIO in the helpers. Fixes: 877bd862f32b8 ("usbnet: introduce usbnet 3 command helpers") Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20220628093517.7469-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1cb6dab3e2d0..e2135ab87a6e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2004,7 +2004,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (size) { - buf = kmalloc(size, GFP_KERNEL); + buf = kmalloc(size, GFP_NOIO); if (!buf) goto out; } @@ -2036,7 +2036,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (data) { - buf = kmemdup(data, size, GFP_KERNEL); + buf = kmemdup(data, size, GFP_NOIO); if (!buf) goto out; } else { -- cgit v1.2.3 From 1758bde2e4aa5ff188d53e7d9d388bbb7e12eebb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 28 Jun 2022 12:15:08 +0200 Subject: net: phy: Don't trigger state machine while in suspend Upon system sleep, mdio_bus_phy_suspend() stops the phy_state_machine(), but subsequent interrupts may retrigger it: They may have been left enabled to facilitate wakeup and are not quiesced until the ->suspend_noirq() phase. Unwanted interrupts may hence occur between mdio_bus_phy_suspend() and dpm_suspend_noirq(), as well as between dpm_resume_noirq() and mdio_bus_phy_resume(). Retriggering the phy_state_machine() through an interrupt is not only undesirable for the reason given in mdio_bus_phy_suspend() (freezing it midway with phydev->lock held), but also because the PHY may be inaccessible after it's suspended: Accesses to USB-attached PHYs are blocked once usb_suspend_both() clears the can_submit flag and PHYs on PCI network cards may become inaccessible upon suspend as well. Amend phy_interrupt() to avoid triggering the state machine if the PHY is suspended. Signal wakeup instead if the attached net_device or its parent has been configured as a wakeup source. (Those conditions are identical to mdio_bus_phy_may_suspend().) Postpone handling of the interrupt until the PHY has resumed. Before stopping the phy_state_machine() in mdio_bus_phy_suspend(), wait for a concurrent phy_interrupt() to run to completion. That is necessary because phy_interrupt() may have checked the PHY's suspend status before the system sleep transition commenced and it may thus retrigger the state machine after it was stopped. Likewise, after re-enabling interrupt handling in mdio_bus_phy_resume(), wait for a concurrent phy_interrupt() to complete to ensure that interrupts which it postponed are properly rerun. The issue was exposed by commit 1ce8b37241ed ("usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling"), but has existed since forever. Fixes: 541cd3ee00a4 ("phylib: Fix deadlock on resume") Link: https://lore.kernel.org/netdev/a5315a8a-32c2-962f-f696-de9a26d30091@samsung.com/ Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Lukas Wunner Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v2.6.33+ Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/b7f386d04e9b5b0e2738f0125743e30676f309ef.1656410895.git.lukas@wunner.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 23 +++++++++++++++++++++++ drivers/net/phy/phy_device.c | 23 +++++++++++++++++++++++ include/linux/phy.h | 6 ++++++ 3 files changed, 52 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef62f357b76d..8d3ee3a6495b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -976,6 +977,28 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) struct phy_driver *drv = phydev->drv; irqreturn_t ret; + /* Wakeup interrupts may occur during a system sleep transition. + * Postpone handling until the PHY has resumed. + */ + if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) { + struct net_device *netdev = phydev->attached_dev; + + if (netdev) { + struct device *parent = netdev->dev.parent; + + if (netdev->wol_enabled) + pm_system_wakeup(); + else if (device_may_wakeup(&netdev->dev)) + pm_wakeup_dev_event(&netdev->dev, 0, true); + else if (parent && device_may_wakeup(parent)) + pm_wakeup_dev_event(parent, 0, true); + } + + phydev->irq_rerun = 1; + disable_irq_nosync(irq); + return IRQ_HANDLED; + } + mutex_lock(&phydev->lock); ret = drv->handle_interrupt(phydev); mutex_unlock(&phydev->lock); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 431a8719c635..46acddd865a7 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -278,6 +278,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) if (phydev->mac_managed_pm) return 0; + /* Wakeup interrupts may occur during the system sleep transition when + * the PHY is inaccessible. Set flag to postpone handling until the PHY + * has resumed. Wait for concurrent interrupt handler to complete. + */ + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 1; + synchronize_irq(phydev->irq); + } + /* We must stop the state machine manually, otherwise it stops out of * control, possibly with the phydev->lock held. Upon resume, netdev * may call phy routines that try to grab the same lock, and that may @@ -315,6 +324,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) if (ret < 0) return ret; no_resume: + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 0; + synchronize_irq(phydev->irq); + + /* Rerun interrupts which were postponed by phy_interrupt() + * because they occurred during the system sleep transition. + */ + if (phydev->irq_rerun) { + phydev->irq_rerun = 0; + enable_irq(phydev->irq); + irq_wake_thread(phydev->irq, phydev); + } + } + if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..b09f7d36cff2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -572,6 +572,10 @@ struct macsec_ops; * @mdix_ctrl: User setting of crossover * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled + * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt + * handling shall be postponed until PHY has resumed + * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, + * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics @@ -626,6 +630,8 @@ struct phy_device { /* Interrupts are enabled */ unsigned interrupts:1; + unsigned irq_suspended:1; + unsigned irq_rerun:1; enum phy_state state; -- cgit v1.2.3 From fa152f626b24ec2ca3489100d8c5c0a0bce4e2ef Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 28 Jun 2022 13:43:49 +0200 Subject: net: phy: ax88772a: fix lost pause advertisement configuration In case of asix_ax88772a_link_change_notify() workaround, we run soft reset which will automatically clear MII_ADVERTISE configuration. The PHYlib framework do not know about changed configuration state of the PHY, so we need use phy_init_hw() to reinit PHY configuration. Fixes: dde258469257 ("net: usb/phy: asix: add support for ax88772A/C PHYs") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220628114349.3929928-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/ax88796b.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c index 457896337505..0f1e617a26c9 100644 --- a/drivers/net/phy/ax88796b.c +++ b/drivers/net/phy/ax88796b.c @@ -88,8 +88,10 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev) /* Reset PHY, otherwise MII_LPA will provide outdated information. * This issue is reproducible only with some link partner PHYs */ - if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset) - phydev->drv->soft_reset(phydev); + if (phydev->state == PHY_NOLINK) { + phy_init_hw(phydev); + phy_start_aneg(phydev); + } } static struct phy_driver asix_driver[] = { -- cgit v1.2.3 From 4e43e64d0f1332fcc503babad4dc31aead7131ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Jun 2022 12:12:48 +0000 Subject: ipv6: fix lockdep splat in in6_dump_addrs() As reported by syzbot, we should not use rcu_dereference() when rcu_read_lock() is not held. WARNING: suspicious RCU usage 5.19.0-rc2-syzkaller #0 Not tainted net/ipv6/addrconf.c:5175 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by syz-executor326/3617: #0: ffffffff8d5848e8 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0xae/0xc20 net/netlink/af_netlink.c:2223 stack backtrace: CPU: 0 PID: 3617 Comm: syz-executor326 Not tainted 5.19.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 in6_dump_addrs+0x12d1/0x1790 net/ipv6/addrconf.c:5175 inet6_dump_addr+0x9c1/0xb50 net/ipv6/addrconf.c:5300 netlink_dump+0x541/0xc20 net/netlink/af_netlink.c:2275 __netlink_dump_start+0x647/0x900 net/netlink/af_netlink.c:2380 netlink_dump_start include/linux/netlink.h:245 [inline] rtnetlink_rcv_msg+0x73e/0xc90 net/core/rtnetlink.c:6046 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 88e2ca308094 ("mld: convert ifmcaddr6 to RCU") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Taehee Yoo Link: https://lore.kernel.org/r/20220628121248.858695-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5864cbc30db6..49cc6587dd77 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5168,9 +5168,9 @@ next: fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = rcu_dereference(idev->mc_list); + for (ifmca = rtnl_dereference(idev->mc_list); ifmca; - ifmca = rcu_dereference(ifmca->next), ip_idx++) { + ifmca = rtnl_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); -- cgit v1.2.3 From 050133e1aa2cb49bb17be847d48a4431598ef562 Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Wed, 29 Jun 2022 04:29:14 +0300 Subject: net: bonding: fix use-after-free after 802.3ad slave unbind commit 0622cab0341c ("bonding: fix 802.3ad aggregator reselection"), resolve case, when there is several aggregation groups in the same bond. bond_3ad_unbind_slave will invalidate (clear) aggregator when __agg_active_ports return zero. So, ad_clear_agg can be executed even, when num_of_ports!=0. Than bond_3ad_unbind_slave can be executed again for, previously cleared aggregator. NOTE: at this time bond_3ad_unbind_slave will not update slave ports list, because lag_ports==NULL. So, here we got slave ports, pointing to freed aggregator memory. Fix with checking actual number of ports in group (as was before commit 0622cab0341c ("bonding: fix 802.3ad aggregator reselection") ), before ad_clear_agg(). The KASAN logs are as follows: [ 767.617392] ================================================================== [ 767.630776] BUG: KASAN: use-after-free in bond_3ad_state_machine_handler+0x13dc/0x1470 [ 767.638764] Read of size 2 at addr ffff00011ba9d430 by task kworker/u8:7/767 [ 767.647361] CPU: 3 PID: 767 Comm: kworker/u8:7 Tainted: G O 5.15.11 #15 [ 767.655329] Hardware name: DNI AmazonGo1 A7040 board (DT) [ 767.660760] Workqueue: lacp_1 bond_3ad_state_machine_handler [ 767.666468] Call trace: [ 767.668930] dump_backtrace+0x0/0x2d0 [ 767.672625] show_stack+0x24/0x30 [ 767.675965] dump_stack_lvl+0x68/0x84 [ 767.679659] print_address_description.constprop.0+0x74/0x2b8 [ 767.685451] kasan_report+0x1f0/0x260 [ 767.689148] __asan_load2+0x94/0xd0 [ 767.692667] bond_3ad_state_machine_handler+0x13dc/0x1470 Fixes: 0622cab0341c ("bonding: fix 802.3ad aggregator reselection") Co-developed-by: Maksym Glubokiy Signed-off-by: Maksym Glubokiy Signed-off-by: Yevhen Orlov Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20220629012914.361-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a86b1f71762e..d7fb33c078e8 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2228,7 +2228,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; - ad_clear_agg(temp_aggregator); + if (temp_aggregator->num_of_ports == 0) + ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ -- cgit v1.2.3 From f8ebb3ac881b17712e1d5967c97ab1806b16d3d6 Mon Sep 17 00:00:00 2001 From: Jose Alonso Date: Tue, 28 Jun 2022 12:13:02 -0300 Subject: net: usb: ax88179_178a: Fix packet receiving This patch corrects packet receiving in ax88179_rx_fixup. - problem observed: ifconfig shows allways a lot of 'RX Errors' while packets are received normally. This occurs because ax88179_rx_fixup does not recognise properly the usb urb received. The packets are normally processed and at the end, the code exits with 'return 0', generating RX Errors. (pkt_cnt==-2 and ptk_hdr over field rx_hdr trying to identify another packet there) This is a usb urb received by "tcpdump -i usbmon2 -X" on a little-endian CPU: 0x0000: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ^ packet 1 start (pkt_len = 0x05ec) ^^^^ IP alignment pseudo header ^ ethernet packet start last byte ethernet packet v padding (8-bytes aligned) vvvv vvvv 0x05e0: c92d d444 1420 8a69 83dd 272f e82b 9811 0x05f0: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... ^ packet 2 0x0be0: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... 0x1130: 9d41 9171 8a38 0ec5 eeee f8e3 3b19 87a0 ... 0x1720: 8cfc 15ff 5e4c e85c eeee f8e3 3b19 87a0 ... 0x1d10: ecfa 2a3a 19ab c78c eeee f8e3 3b19 87a0 ... 0x2070: eeee f8e3 3b19 87a0 94de 80e3 daac 0800 ... ^ packet 7 0x2120: 7c88 4ca5 5c57 7dcc 0d34 7577 f778 7e0a 0x2130: f032 e093 7489 0740 3008 ec05 0000 0080 ====1==== ====2==== hdr_off ^ pkt_len = 0x05ec ^^^^ AX_RXHDR_*=0x00830 ^^^^ ^ pkt_len = 0 ^^^^ AX_RXHDR_DROP_ERR=0x80000000 ^^^^ ^ 0x2140: 3008 ec05 0000 0080 3008 5805 0000 0080 0x2150: 3008 ec05 0000 0080 3008 ec05 0000 0080 0x2160: 3008 5803 0000 0080 3008 c800 0000 0080 ===11==== ===12==== ===13==== ===14==== 0x2170: 0000 0000 0e00 3821 ^^^^ ^^^^ rx_hdr ^^^^ pkt_cnt=14 ^^^^ hdr_off=0x2138 ^^^^ ^^^^ padding The dump shows that pkt_cnt is the number of entrys in the per-packet metadata. It is "2 * packet count". Each packet have two entrys. The first have a valid value (pkt_len and AX_RXHDR_*) and the second have a dummy-header 0x80000000 (pkt_len=0 with AX_RXHDR_DROP_ERR). Why exists dummy-header for each packet?!? My guess is that this was done probably to align the entry for each packet to 64-bits and maintain compatibility with old firmware. There is also a padding (0x00000000) before the rx_hdr to align the end of rx_hdr to 64-bit. Note that packets have a alignment of 64-bits (8-bytes). This patch assumes that the dummy-header and the last padding are optional. So it preserves semantics and recognises the same valid packets as the current code. This patch was made using only the dumpfile information and tested with only one device: 0b95:1790 ASIX Electronics Corp. AX88179 Gigabit Ethernet Fixes: 57bc3d3ae8c1 ("net: usb: ax88179_178a: Fix out-of-bounds accesses in RX fixup") Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Jose Alonso Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/d6970bb04bf67598af4d316eaeb1792040b18cfd.camel@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/ax88179_178a.c | 101 +++++++++++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 25 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 4704ed6f00ef..ac2d400d1d6c 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1472,6 +1472,42 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) * are bundled into this buffer and where we can find an array of * per-packet metadata (which contains elements encoded into u16). */ + + /* SKB contents for current firmware: + * + * ... + * + * + * ... + * + * + * + * where: + * contains pkt_len bytes: + * 2 bytes of IP alignment pseudo header + * packet received + * contains 4 bytes: + * pkt_len and fields AX_RXHDR_* + * 0-7 bytes to terminate at + * 8 bytes boundary (64-bit). + * 4 bytes to make rx_hdr terminate at + * 8 bytes boundary (64-bit) + * contains 4 bytes: + * pkt_len=0 and AX_RXHDR_DROP_ERR + * contains 4 bytes: + * pkt_cnt and hdr_off (offset of + * ) + * + * pkt_cnt is number of entrys in the per-packet metadata. + * In current firmware there is 2 entrys per packet. + * The first points to the packet and the + * second is a dummy header. + * This was done probably to align fields in 64-bit and + * maintain compatibility with old firmware. + * This code assumes that and are + * optional. + */ + if (skb->len < 4) return 0; skb_trim(skb, skb->len - 4); @@ -1485,51 +1521,66 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Make sure that the bounds of the metadata array are inside the SKB * (and in front of the counter at the end). */ - if (pkt_cnt * 2 + hdr_off > skb->len) + if (pkt_cnt * 4 + hdr_off > skb->len) return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); /* Packets must not overlap the metadata array */ skb_trim(skb, hdr_off); - for (; ; pkt_cnt--, pkt_hdr++) { + for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) { + u16 pkt_len_plus_padd; u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; + pkt_len_plus_padd = (pkt_len + 7) & 0xfff8; - if (pkt_len > skb->len) + /* Skip dummy header used for alignment + */ + if (pkt_len == 0) + continue; + + if (pkt_len_plus_padd > skb->len) return 0; /* Check CRC or runt packet */ - if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && - pkt_len >= 2 + ETH_HLEN) { - bool last = (pkt_cnt == 0); - - if (last) { - ax_skb = skb; - } else { - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (!ax_skb) - return 0; - } - ax_skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - skb_set_tail_pointer(ax_skb, ax_skb->len); - ax_skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(ax_skb, pkt_hdr); + if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) || + pkt_len < 2 + ETH_HLEN) { + dev->net->stats.rx_errors++; + skb_pull(skb, pkt_len_plus_padd); + continue; + } - if (last) - return 1; + /* last packet */ + if (pkt_len_plus_padd == skb->len) { + skb_trim(skb, pkt_len); - usbnet_skb_return(dev, ax_skb); + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + + skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; } - /* Trim this packet away from the SKB */ - if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) return 0; + skb_trim(ax_skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + + skb->truesize = pkt_len_plus_padd + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); + + skb_pull(skb, pkt_len_plus_padd); } + + return 0; } static struct sk_buff * -- cgit v1.2.3 From 9cc02ede696272c5271a401e4f27c262359bc2f6 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 29 Jun 2022 08:26:40 +0800 Subject: net: rose: fix UAF bugs caused by timer handler There are UAF bugs in rose_heartbeat_expiry(), rose_timer_expiry() and rose_idletimer_expiry(). The root cause is that del_timer() could not stop the timer handler that is running and the refcount of sock is not managed properly. One of the UAF bugs is shown below: (thread 1) | (thread 2) | rose_bind | rose_connect | rose_start_heartbeat rose_release | (wait a time) case ROSE_STATE_0 | rose_destroy_socket | rose_heartbeat_expiry rose_stop_heartbeat | sock_put(sk) | ... sock_put(sk) // FREE | | bh_lock_sock(sk) // USE The sock is deallocated by sock_put() in rose_release() and then used by bh_lock_sock() in rose_heartbeat_expiry(). Although rose_destroy_socket() calls rose_stop_heartbeat(), it could not stop the timer that is running. The KASAN report triggered by POC is shown below: BUG: KASAN: use-after-free in _raw_spin_lock+0x5a/0x110 Write of size 4 at addr ffff88800ae59098 by task swapper/3/0 ... Call Trace: dump_stack_lvl+0xbf/0xee print_address_description+0x7b/0x440 print_report+0x101/0x230 ? irq_work_single+0xbb/0x140 ? _raw_spin_lock+0x5a/0x110 kasan_report+0xed/0x120 ? _raw_spin_lock+0x5a/0x110 kasan_check_range+0x2bd/0x2e0 _raw_spin_lock+0x5a/0x110 rose_heartbeat_expiry+0x39/0x370 ? rose_start_heartbeat+0xb0/0xb0 call_timer_fn+0x2d/0x1c0 ? rose_start_heartbeat+0xb0/0xb0 expire_timers+0x1f3/0x320 __run_timers+0x3ff/0x4d0 run_timer_softirq+0x41/0x80 __do_softirq+0x233/0x544 irq_exit_rcu+0x41/0xa0 sysvec_apic_timer_interrupt+0x8c/0xb0 asm_sysvec_apic_timer_interrupt+0x1b/0x20 RIP: 0010:default_idle+0xb/0x10 RSP: 0018:ffffc9000012fea0 EFLAGS: 00000202 RAX: 000000000000bcae RBX: ffff888006660f00 RCX: 000000000000bcae RDX: 0000000000000001 RSI: ffffffff843a11c0 RDI: ffffffff843a1180 RBP: dffffc0000000000 R08: dffffc0000000000 R09: ffffed100da36d46 R10: dfffe9100da36d47 R11: ffffffff83cf0950 R12: 0000000000000000 R13: 1ffff11000ccc1e0 R14: ffffffff8542af28 R15: dffffc0000000000 ... Allocated by task 146: __kasan_kmalloc+0xc4/0xf0 sk_prot_alloc+0xdd/0x1a0 sk_alloc+0x2d/0x4e0 rose_create+0x7b/0x330 __sock_create+0x2dd/0x640 __sys_socket+0xc7/0x270 __x64_sys_socket+0x71/0x80 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 152: kasan_set_track+0x4c/0x70 kasan_set_free_info+0x1f/0x40 ____kasan_slab_free+0x124/0x190 kfree+0xd3/0x270 __sk_destruct+0x314/0x460 rose_release+0x2fa/0x3b0 sock_close+0xcb/0x230 __fput+0x2d9/0x650 task_work_run+0xd6/0x160 exit_to_user_mode_loop+0xc7/0xd0 exit_to_user_mode_prepare+0x4e/0x80 syscall_exit_to_user_mode+0x20/0x40 do_syscall_64+0x4f/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 This patch adds refcount of sock when we use functions such as rose_start_heartbeat() and so on to start timer, and decreases the refcount of sock when timer is finished or deleted by functions such as rose_stop_heartbeat() and so on. As a result, the UAF bugs could be mitigated. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Duoming Zhou Tested-by: Duoming Zhou Link: https://lore.kernel.org/r/20220629002640.5693-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/rose/rose_timer.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552..f06ddbed3fed 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } -- cgit v1.2.3 From 665030fd0c1ed9f505932e6e73e7a2c788787a0a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 29 Jun 2022 10:02:05 +0300 Subject: mlxsw: spectrum_router: Fix rollback in tunnel next hop init In mlxsw_sp_nexthop6_init(), a next hop is always added to the router linked list, and mlxsw_sp_nexthop_type_init() is invoked afterwards. When that function results in an error, the next hop will not have been removed from the linked list. As the error is propagated upwards and the caller frees the next hop object, the linked list ends up holding an invalid object. A similar issue comes up with mlxsw_sp_nexthop4_init(), where rollback block does exist, however does not include the linked list removal. Both IPv6 and IPv4 next hops have a similar issue with next-hop counter rollbacks. As these were introduced in the same patchset as the next hop linked list, include the cleanup in this patch. Fixes: dbe4598c1e92 ("mlxsw: spectrum_router: Keep nexthops in a linked list") Fixes: a5390278a5eb ("mlxsw: spectrum: Add support for setting counters on nexthops") Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220629070205.803952-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9dbb573d53ea..0d8a0068e4ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4415,6 +4415,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop_neigh_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } @@ -6740,6 +6742,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct net_device *dev = rt->fib6_nh->fib_nh_dev; + int err; nh->nhgi = nh_grp->nhgi; nh->nh_weight = rt->fib6_nh->fib_nh_weight; @@ -6755,7 +6758,16 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_type_init; + + return 0; + +err_nexthop_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, -- cgit v1.2.3 From 0a18d802d65cf662644fd1d369c86d84a5630652 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Wed, 29 Jun 2022 15:55:50 +0800 Subject: net: sfp: fix memory leak in sfp_probe() sfp_probe() allocates a memory chunk from sfp with sfp_alloc(). When devm_add_action() fails, sfp is not freed, which leads to a memory leak. We should use devm_add_action_or_reset() instead of devm_add_action(). Signed-off-by: Jianglei Nie Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20220629075550.2152003-1-niejianglei2021@163.com Signed-off-by: Paolo Abeni --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 9a5d5a10560f..e7b0e12cc75b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2516,7 +2516,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; -- cgit v1.2.3 From 9c5de246c1dbe785268fc2e83c88624b92e4ec93 Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Thu, 30 Jun 2022 14:22:26 +0200 Subject: net: sparx5: mdb add/del handle non-sparx5 devices When adding/deleting mdb entries on other net_devices, eg., tap interfaces, it should not crash. Fixes: 3bacfccdcb2d ("net: sparx5: Add mdb handlers") Signed-off-by: Casper Andersson Reviewed-by: Steen Hegelund Link: https://lore.kernel.org/r/20220630122226.316812-1-casper.casan@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index 3429660cd2e5..5edc8b7176c8 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -396,6 +396,9 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev, u32 mact_entry; int res, err; + if (!sparx5_netdevice_check(dev)) + return -EOPNOTSUPP; + if (netif_is_bridge_master(v->obj.orig_dev)) { sparx5_mact_learn(spx5, PGID_CPU, v->addr, v->vid); return 0; @@ -466,6 +469,9 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev, u32 mact_entry, res, pgid_entry[3]; int err; + if (!sparx5_netdevice_check(dev)) + return -EOPNOTSUPP; + if (netif_is_bridge_master(v->obj.orig_dev)) { sparx5_mact_forget(spx5, v->addr, v->vid); return 0; -- cgit v1.2.3 From ff1fa2081d173b01cebe2fbf0a2d0f1cee9ce4b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Jun 2022 11:19:10 -0700 Subject: net: tun: avoid disabling NAPI twice Eric reports that syzbot made short work out of my speculative fix. Indeed when queue gets detached its tfile->tun remains, so we would try to stop NAPI twice with a detach(), close() sequence. Alternative fix would be to move tun_napi_disable() to tun_detach_all() and let the NAPI run after the queue has been detached. Fixes: a8fc8cb5692a ("net: tun: stop NAPI when detaching queues") Reported-by: syzbot Reported-by: Eric Dumazet Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220629181911.372047-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e2eb35887394..259b2b84b2b3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -640,7 +640,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } -- cgit v1.2.3 From 839b92fede7ba308f1a475aa00fea55f63b7fccf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Jun 2022 11:19:11 -0700 Subject: selftest: tun: add test for NAPI dismantle Being lazy does not pay, add the test for various ordering of tun queue close / detach / destroy. Link: https://lore.kernel.org/r/20220629181911.372047-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tun.c | 162 +++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/tun.c diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7ea54af55490..ddad703ace34 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -54,7 +54,7 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c new file mode 100644 index 000000000000..fa83918b62d1 --- /dev/null +++ b/tools/testing/selftests/net/tun.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +static int tun_attach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_ATTACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_detach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_DETACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_alloc(char *dev) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + fprintf(stderr, "can't open tun: %s\n", strerror(errno)); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err < 0) { + fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); + close(fd); + return err; + } + strcpy(dev, ifr.ifr_name); + return fd; +} + +static int tun_delete(char *dev) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + unsigned char data[64]; + } req; + struct rtattr *rta; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_DELLINK; + + req.ifm.ifi_family = AF_UNSPEC; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); + rta->rta_type = IFLA_IFNAME; + rta->rta_len = RTA_LENGTH(IFNAMSIZ); + req.nh.nlmsg_len += rta->rta_len; + memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "can't send: %s\n", strerror(errno)); + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +FIXTURE(tun) +{ + char ifname[IFNAMSIZ]; + int fd, fd2; +}; + +FIXTURE_SETUP(tun) +{ + memset(self->ifname, 0, sizeof(self->ifname)); + + self->fd = tun_alloc(self->ifname); + ASSERT_GE(self->fd, 0); + + self->fd2 = tun_alloc(self->ifname); + ASSERT_GE(self->fd2, 0); +} + +FIXTURE_TEARDOWN(tun) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fd2 >= 0) + close(self->fd2); +} + +TEST_F(tun, delete_detach_close) { + EXPECT_EQ(tun_delete(self->ifname), 0); + EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); + EXPECT_EQ(errno, 22); +} + +TEST_F(tun, detach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, detach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 58bf4db695287c4bb2a5fc9fc12c78fdd4c36894 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 29 Jun 2022 21:30:07 +0300 Subject: net: dsa: felix: fix race between reading PSFP stats and port stats Both PSFP stats and the port stats read by ocelot_check_stats_work() are indirectly read through the same mechanism - write to STAT_CFG:STAT_VIEW, read from SYS:STAT:CNT[n]. It's just that for port stats, we write STAT_VIEW with the index of the port, and for PSFP stats, we write STAT_VIEW with the filter index. So if we allow them to run concurrently, ocelot_check_stats_work() may change the view from vsc9959_psfp_counters_get(), and vice versa. Fixes: 7d4b564d6add ("net: dsa: felix: support psfp filter on vsc9959") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220629183007.3808130-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 570d0204b7be..9c27b9b0128d 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1886,6 +1886,8 @@ static void vsc9959_psfp_sgi_table_del(struct ocelot *ocelot, static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, struct felix_stream_filter_counters *counters) { + mutex_lock(&ocelot->stats_lock); + ocelot_rmw(ocelot, SYS_STAT_CFG_STAT_VIEW(index), SYS_STAT_CFG_STAT_VIEW_M, SYS_STAT_CFG); @@ -1900,6 +1902,8 @@ static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, SYS_STAT_CFG_STAT_VIEW(index) | SYS_STAT_CFG_STAT_CLEAR_SHOT(0x10), SYS_STAT_CFG); + + mutex_unlock(&ocelot->stats_lock); } static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, -- cgit v1.2.3 From 868f9f2f8e004bfe0d3935b1976f625b2924893b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 30 Jun 2022 22:58:49 +0300 Subject: vfs: fix copy_file_range() regression in cross-fs copies A regression has been reported by Nicolas Boichat, found while using the copy_file_range syscall to copy a tracefs file. Before commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the kernel would return -EXDEV to userspace when trying to copy a file across different filesystems. After this commit, the syscall doesn't fail anymore and instead returns zero (zero bytes copied), as this file's content is generated on-the-fly and thus reports a size of zero. Another regression has been reported by He Zhe - the assertion of WARN_ON_ONCE(ret == -EOPNOTSUPP) can be triggered from userspace when copying from a sysfs file whose read operation may return -EOPNOTSUPP. Since we do not have test coverage for copy_file_range() between any two types of filesystems, the best way to avoid these sort of issues in the future is for the kernel to be more picky about filesystems that are allowed to do copy_file_range(). This patch restores some cross-filesystem copy restrictions that existed prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices"), namely, cross-sb copy is not allowed for filesystems that do not implement ->copy_file_range(). Filesystems that do implement ->copy_file_range() have full control of the result - if this method returns an error, the error is returned to the user. Before this change this was only true for fs that did not implement the ->remap_file_range() operation (i.e. nfsv3). Filesystems that do not implement ->copy_file_range() still fall-back to the generic_copy_file_range() implementation when the copy is within the same sb. This helps the kernel can maintain a more consistent story about which filesystems support copy_file_range(). nfsd and ksmbd servers are modified to fall-back to the generic_copy_file_range() implementation in case vfs_copy_file_range() fails with -EOPNOTSUPP or -EXDEV, which preserves behavior of server-side-copy. fall-back to generic_copy_file_range() is not implemented for the smb operation FSCTL_DUPLICATE_EXTENTS_TO_FILE, which is arguably a correct change of behavior. Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/ Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/ Link: https://lore.kernel.org/linux-fsdevel/20210630161320.29006-1-lhenriques@suse.de/ Reported-by: Nicolas Boichat Reported-by: kernel test robot Signed-off-by: Luis Henriques Fixes: 64bf5ff58dff ("vfs: no fallback for ->copy_file_range") Link: https://lore.kernel.org/linux-fsdevel/20f17f64-88cb-4e80-07c1-85cb96c83619@windriver.com/ Reported-by: He Zhe Tested-by: Namjae Jeon Tested-by: Luis Henriques Signed-off-by: Amir Goldstein Signed-off-by: Linus Torvalds --- fs/ksmbd/smb2pdu.c | 16 +++++++++--- fs/ksmbd/vfs.c | 4 +++ fs/nfsd/vfs.c | 8 +++++- fs/read_write.c | 77 +++++++++++++++++++++++++++++++----------------------- 4 files changed, 68 insertions(+), 37 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 94ab1dcd80e7..353f047e783c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7810,14 +7810,24 @@ int smb2_ioctl(struct ksmbd_work *work) src_off = le64_to_cpu(dup_ext->SourceFileOffset); dst_off = le64_to_cpu(dup_ext->TargetFileOffset); length = le64_to_cpu(dup_ext->ByteCount); - cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, - dst_off, length, 0); + /* + * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE + * should fall back to vfs_copy_file_range(). This could be + * beneficial when re-exporting nfs/smb mount, but note that + * this can result in partial copy that returns an error status. + * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, + * fall back to vfs_copy_file_range(), should be avoided when + * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. + */ + cloned = vfs_clone_file_range(fp_in->filp, src_off, + fp_out->filp, dst_off, length, 0); if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { ret = -EOPNOTSUPP; goto dup_ext_out; } else if (cloned != length) { cloned = vfs_copy_file_range(fp_in->filp, src_off, - fp_out->filp, dst_off, length, 0); + fp_out->filp, dst_off, + length, 0); if (cloned != length) { if (cloned < 0) ret = cloned; diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 5d185564aef6..05efcdf7a4a7 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1779,6 +1779,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, + len, 0); if (ret < 0) return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 840e3af63a6f..b764213bcc55 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -577,6 +577,7 @@ out_err: ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -587,7 +588,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/read_write.c b/fs/read_write.c index b1b1cdfee9d3..e0777eefd846 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1397,28 +1397,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* - * Although we now allow filesystems to handle cross sb copy, passing - * a file of the wrong filesystem type to filesystem driver can result - * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. NFS defines - * several different file_system_type structures, but they all end up - * using the same ->copy_file_range() function pointer. - */ - if (file_out->f_op->copy_file_range && - file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) - return file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - - return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); -} - /* * Performs necessary checks before doing a file copy * @@ -1440,6 +1418,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* + * We allow some filesystems to handle cross sb copy, but passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. + * + * nfs and cifs define several different file_system_type structures + * and several different sets of file_operations, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range) { + if (file_in->f_op->copy_file_range != + file_out->f_op->copy_file_range) + return -EXDEV; + } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { + return -EXDEV; + } + /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; @@ -1505,26 +1501,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). + * Cloning is supported by more file systems, so we implement copy on + * same sb using clone, but for filesystems where both clone and copy + * are supported (e.g. nfs,cifs), we only call the copy method. */ + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + goto done; + } + if (file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { - loff_t cloned; - - cloned = file_in->f_op->remap_file_range(file_in, pos_in, + ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), REMAP_FILE_CAN_SHORTEN); - if (cloned > 0) { - ret = cloned; + if (ret > 0) goto done; - } } - ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); - WARN_ON_ONCE(ret == -EOPNOTSUPP); + /* + * We can get here for same sb copy of filesystems that do not implement + * ->copy_file_range() in case filesystem does not support clone or in + * case filesystem supports clone but rejected the clone request (e.g. + * because it was not block aligned). + * + * In both cases, fall back to kernel copy so we are able to maintain a + * consistent story about which filesystems support copy_file_range() + * and which filesystems do not, that will allow userspace tools to + * make consistent desicions w.r.t using copy_file_range(). + */ + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + done: if (ret > 0) { fsnotify_access(file_in); -- cgit v1.2.3