From b57c1a1e7effab067a65bab54c5d83a67cffd043 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 4 Jul 2023 18:18:08 +0800 Subject: EDAC/synopsys: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Michal Simek Link: https://lore.kernel.org/r/20230704101811.49637-3-frank.li@vivo.com --- drivers/edac/synopsys_edac.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 709babce43ba..5527055b0964 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -1324,11 +1324,9 @@ static int mc_probe(struct platform_device *pdev) struct synps_edac_priv *priv; struct mem_ctl_info *mci; void __iomem *baseaddr; - struct resource *res; int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - baseaddr = devm_ioremap_resource(&pdev->dev, res); + baseaddr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(baseaddr)) return PTR_ERR(baseaddr); -- cgit v1.2.3 From 3f3174996be6b4312c38f54d5969f5d5b75fec9e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 22 Jan 2024 22:13:59 -0600 Subject: RAS: Introduce AMD Address Translation Library AMD Zen-based systems report memory errors through Machine Check banks representing Unified Memory Controllers (UMCs). The address value reported for DRAM ECC errors is a "normalized address" that is relative to the UMC. This normalized address must be converted to a system physical address to be usable by the OS. Support for this address translation was introduced to the MCA subsystem with Zen1 systems. The code was later moved to the AMD64 EDAC module, since this was the only user of the code at the time. However, there are uses for this translation outside of EDAC. The system physical address can be used in MCA for preemptive page offlining as done in some MCA notifier functions. Also, this translation is needed as the basis of similar functionality needed for some CXL configurations on AMD systems. Introduce a common address translation library that can be used for multiple subsystems including MCA, EDAC, and CXL. Include support for UMC normalized to system physical address translation for current CPU systems. The Data Fabric Indirect register access offsets and one of the register fields were changed. Default to the current offsets and register field definition. And fallback to the older values if running on a "legacy" system. Provide built-in code to facilitate the loading and unloading of the library module without affecting other modules or built-in code. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240123041401.79812-2-yazen.ghannam@amd.com --- MAINTAINERS | 6 + drivers/ras/Kconfig | 1 + drivers/ras/Makefile | 2 + drivers/ras/amd/atl/Kconfig | 20 ++ drivers/ras/amd/atl/Makefile | 18 ++ drivers/ras/amd/atl/access.c | 106 ++++++ drivers/ras/amd/atl/core.c | 225 +++++++++++++ drivers/ras/amd/atl/dehash.c | 407 +++++++++++++++++++++++ drivers/ras/amd/atl/denormalize.c | 617 +++++++++++++++++++++++++++++++++++ drivers/ras/amd/atl/internal.h | 297 +++++++++++++++++ drivers/ras/amd/atl/map.c | 665 ++++++++++++++++++++++++++++++++++++++ drivers/ras/amd/atl/reg_fields.h | 603 ++++++++++++++++++++++++++++++++++ drivers/ras/amd/atl/system.c | 281 ++++++++++++++++ drivers/ras/amd/atl/umc.c | 41 +++ drivers/ras/ras.c | 31 ++ include/linux/ras.h | 16 + 16 files changed, 3336 insertions(+) create mode 100644 drivers/ras/amd/atl/Kconfig create mode 100644 drivers/ras/amd/atl/Makefile create mode 100644 drivers/ras/amd/atl/access.c create mode 100644 drivers/ras/amd/atl/core.c create mode 100644 drivers/ras/amd/atl/dehash.c create mode 100644 drivers/ras/amd/atl/denormalize.c create mode 100644 drivers/ras/amd/atl/internal.h create mode 100644 drivers/ras/amd/atl/map.c create mode 100644 drivers/ras/amd/atl/reg_fields.h create mode 100644 drivers/ras/amd/atl/system.c create mode 100644 drivers/ras/amd/atl/umc.c (limited to 'drivers') diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..25537a37338e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -897,6 +897,12 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/hw/efa/ F: include/uapi/rdma/efa-abi.h +AMD ADDRESS TRANSLATION LIBRARY (ATL) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Supported +F: drivers/ras/amd/atl/* + AMD AXI W1 DRIVER M: Kris Chaplin R: Thomas Delev diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e846..2e969f59c0ca 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -32,5 +32,6 @@ menuconfig RAS if RAS source "arch/x86/ras/Kconfig" +source "drivers/ras/amd/atl/Kconfig" endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f50107..3fac80f58005 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,5 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o + +obj-y += amd/atl/ diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig new file mode 100644 index 000000000000..a43513a700f1 --- /dev/null +++ b/drivers/ras/amd/atl/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Kconfig +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +config AMD_ATL + tristate "AMD Address Translation Library" + depends on AMD_NB && X86_64 && RAS + default N + help + This library includes support for implementation-specific + address translation procedures needed for various error + handling cases. + + Enable this option if using DRAM ECC on Zen-based systems + and OS-based error handling. diff --git a/drivers/ras/amd/atl/Makefile b/drivers/ras/amd/atl/Makefile new file mode 100644 index 000000000000..4acd5f05bd9c --- /dev/null +++ b/drivers/ras/amd/atl/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Makefile +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +amd_atl-y := access.o +amd_atl-y += core.o +amd_atl-y += dehash.o +amd_atl-y += denormalize.o +amd_atl-y += map.o +amd_atl-y += system.o +amd_atl-y += umc.o + +obj-$(CONFIG_AMD_ATL) += amd_atl.o diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c new file mode 100644 index 000000000000..f6dd87bb2c35 --- /dev/null +++ b/drivers/ras/amd/atl/access.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * access.c : DF Indirect Access functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): there are FICAD + * low and high registers but so far only the low register is needed. + * + * Use Instance Id 0xFF to indicate a broadcast read. + */ +#define DF_BROADCAST 0xFF + +#define DF_FICAA_INST_EN BIT(0) +#define DF_FICAA_REG_NUM GENMASK(10, 1) +#define DF_FICAA_FUNC_NUM GENMASK(13, 11) +#define DF_FICAA_INST_ID GENMASK(23, 16) + +#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) + +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; + struct pci_dev *F4; + int err = -ENODEV; + u32 ficaa = 0; + + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + /* Enable instance-specific access. */ + if (instance_id != DF_BROADCAST) { + ficaa |= FIELD_PREP(DF_FICAA_INST_EN, 1); + ficaa |= FIELD_PREP(DF_FICAA_INST_ID, instance_id); + } + + /* + * The two least-significant bits are masked when inputing the + * register offset to FICAA. + */ + reg >>= 2; + + if (df_cfg.flags.legacy_ficaa) { + ficaa_addr = 0x5C; + ficad_addr = 0x98; + + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM_LEGACY, reg); + } else { + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM, reg); + } + + ficaa |= FIELD_PREP(DF_FICAA_FUNC_NUM, func); + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, ficaa_addr, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, ficad_addr, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + + pr_debug("node=%u inst=0x%x func=0x%x reg=0x%x val=0x%x", + node, instance_id, func, reg << 2, *lo); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c new file mode 100644 index 000000000000..6dc4e06305f7 --- /dev/null +++ b/drivers/ras/amd/atl/core.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * core.c : Module init and base translation functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include +#include + +#include "internal.h" + +struct df_config df_cfg __read_mostly; + +static int addr_over_limit(struct addr_ctx *ctx) +{ + u64 dram_limit_addr; + + if (df_cfg.rev >= DF4) + dram_limit_addr = FIELD_GET(DF4_DRAM_LIMIT_ADDR, ctx->map.limit); + else + dram_limit_addr = FIELD_GET(DF2_DRAM_LIMIT_ADDR, ctx->map.limit); + + dram_limit_addr <<= DF_DRAM_BASE_LIMIT_LSB; + dram_limit_addr |= GENMASK(DF_DRAM_BASE_LIMIT_LSB - 1, 0); + + /* Is calculated system address above DRAM limit address? */ + if (ctx->ret_addr > dram_limit_addr) { + atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)", + ctx->ret_addr, dram_limit_addr); + return -EINVAL; + } + + return 0; +} + +static bool legacy_hole_en(struct addr_ctx *ctx) +{ + u32 reg = ctx->map.base; + + if (df_cfg.rev >= DF4) + reg = ctx->map.ctl; + + return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); +} + +static int add_legacy_hole(struct addr_ctx *ctx) +{ + u32 dram_hole_base; + u8 func = 0; + + if (!legacy_hole_en(ctx)) + return 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) + return -EINVAL; + + dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + if (ctx->ret_addr >= dram_hole_base) + ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + + return 0; +} + +static u64 get_base_addr(struct addr_ctx *ctx) +{ + u64 base_addr; + + if (df_cfg.rev >= DF4) + base_addr = FIELD_GET(DF4_BASE_ADDR, ctx->map.base); + else + base_addr = FIELD_GET(DF2_BASE_ADDR, ctx->map.base); + + return base_addr << DF_DRAM_BASE_LIMIT_LSB; +} + +static int add_base_and_hole(struct addr_ctx *ctx) +{ + ctx->ret_addr += get_base_addr(ctx); + + if (add_legacy_hole(ctx)) + return -EINVAL; + + return 0; +} + +static bool late_hole_remove(struct addr_ctx *ctx) +{ + if (df_cfg.rev == DF3p5) + return true; + + if (df_cfg.rev == DF4) + return true; + + if (ctx->map.intlv_mode == DF3_6CHAN) + return true; + + return false; +} + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr) +{ + struct addr_ctx ctx; + + if (df_cfg.rev == UNKNOWN) + return -EINVAL; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = addr; + ctx.inst_id = coh_st_inst_id; + + ctx.inputs.norm_addr = addr; + ctx.inputs.socket_id = socket_id; + ctx.inputs.die_id = die_id; + ctx.inputs.coh_st_inst_id = coh_st_inst_id; + + if (determine_node_id(&ctx, socket_id, die_id)) + return -EINVAL; + + if (get_address_map(&ctx)) + return -EINVAL; + + if (denormalize_address(&ctx)) + return -EINVAL; + + if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (dehash_address(&ctx)) + return -EINVAL; + + if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (addr_over_limit(&ctx)) + return -EINVAL; + + return ctx.ret_addr; +} + +static void check_for_legacy_df_access(void) +{ + /* + * All Zen-based systems before Family 19h use the legacy + * DF Indirect Access (FICAA/FICAD) offsets. + */ + if (boot_cpu_data.x86 < 0x19) { + df_cfg.flags.legacy_ficaa = true; + return; + } + + /* All systems after Family 19h use the current offsets. */ + if (boot_cpu_data.x86 > 0x19) + return; + + /* Some Family 19h systems use the legacy offsets. */ + switch (boot_cpu_data.x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + df_cfg.flags.legacy_ficaa = true; + } +} + +/* + * This library provides functionality for AMD-based systems with a Data Fabric. + * The set of systems with a Data Fabric is equivalent to the set of Zen-based systems + * and the set of systems with the Scalable MCA feature at this time. However, these + * are technically independent things. + * + * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be + * an ever expanding list. Instead, match on the SMCA and Zen features to cover all + * relevant systems. + */ +static const struct x86_cpu_id amd_atl_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_SMCA, NULL), + X86_MATCH_FEATURE(X86_FEATURE_ZEN, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids); + +static int __init amd_atl_init(void) +{ + if (!x86_match_cpu(amd_atl_cpuids)) + return -ENODEV; + + if (!amd_nb_num()) + return -ENODEV; + + check_for_legacy_df_access(); + + if (get_df_system_info()) + return -ENODEV; + + /* Increment this module's recount so that it can't be easily unloaded. */ + __module_get(THIS_MODULE); + amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); + + pr_info("AMD Address Translation Library initialized"); + return 0; +} + +/* + * Exit function is only needed for testing and debug. Module unload must be + * forced to override refcount check. + */ +static void __exit amd_atl_exit(void) +{ + amd_atl_unregister_decoder(); +} + +module_init(amd_atl_init); +module_exit(amd_atl_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c new file mode 100644 index 000000000000..6f414926e6fe --- /dev/null +++ b/drivers/ras/amd/atl/dehash.c @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * dehash.c : Functions to account for hashing bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int df2_dehash_addr(struct addr_ctx *ctx) +{ + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(12), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df3_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + /* Calculation complete for 2 channels. Continue for 4 and 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + return 0; +} + +static int df3_6chan_dehash_addr(struct addr_ctx *ctx) +{ + u8 intlv_bit_pos = ctx->map.intlv_bit_pos; + u8 hashed_bit, intlv_bit, num_intlv_bits; + bool hash_ctl_2M, hash_ctl_1G; + + if (ctx->map.intlv_mode != DF3_6CHAN) { + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + num_intlv_bits = ilog2(ctx->map.num_intlv_chan) + 1; + + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= !!(BIT_ULL(intlv_bit_pos + num_intlv_bits) & ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df4_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (ctx->map.num_intlv_sockets == 1) + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + + /* + * Hashing is possible with socket interleaving, so check the total number + * of channels in the system rather than DRAM map interleaving mode. + * + * Calculation complete for 2 channels. Continue for 4, 8, and 16 channels. + */ + if (ctx->map.total_intlv_chan <= 2) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 and 16 channels. */ + if (ctx->map.total_intlv_chan <= 4) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + /* Calculation complete for 8 channels. Continue for 16 channels. */ + if (ctx->map.total_intlv_chan <= 8) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + + return 0; +} + +static int df4p5_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + u8 hashed_bit, intlv_bit; + u64 rehash_vector; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl); + + /* + * Generate a unique address to determine which bits + * need to be dehashed. + * + * Start with a contiguous bitmask for the total + * number of channels starting at bit 8. + * + * Then make a gap in the proper place based on + * interleave mode. + */ + rehash_vector = ctx->map.total_intlv_chan - 1; + rehash_vector <<= 8; + + if (ctx->map.intlv_mode == DF4p5_NPS2_4CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_8CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_16CHAN_1K_HASH) + rehash_vector = expand_bits(10, 2, rehash_vector); + else + rehash_vector = expand_bits(9, 3, rehash_vector); + + if (rehash_vector & BIT_ULL(8)) { + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + } + + if (rehash_vector & BIT_ULL(9)) { + intlv_bit = FIELD_GET(BIT_ULL(9), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(9); + } + + if (rehash_vector & BIT_ULL(12)) { + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + } + + if (rehash_vector & BIT_ULL(13)) { + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + } + + if (rehash_vector & BIT_ULL(14)) { + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(20), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(25), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(34), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(44), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + } + + return 0; +} + +int dehash_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + /* No hashing cases. */ + case NONE: + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + /* Hashing bits handled earlier during CS ID calculation. */ + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + /* No hash physical address bits, so nothing to do. */ + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 0; + + case DF2_2CHAN_HASH: + return df2_dehash_addr(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + return df3_dehash_addr(ctx); + + case DF3_6CHAN: + return df3_6chan_dehash_addr(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + return df4_dehash_addr(ctx); + + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return df4p5_dehash_addr(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } +} diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c new file mode 100644 index 000000000000..01f1d0fb6799 --- /dev/null +++ b/drivers/ras/amd/atl/denormalize.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * denormalize.c : Functions to account for interleaving bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Returns the Destination Fabric ID. This is the first (lowest) + * COH_ST Fabric ID used within a DRAM Address map. + */ +static u16 get_dst_fabric_id(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit); + case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit); + case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl); + case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl); + default: + atl_debug_on_bad_df_rev(); + return 0; + } +} + +/* + * Make a contiguous gap in address for N bits starting at bit P. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0] + * [23 : 11][10 : 8][7 : 0] + */ +static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx) +{ + return expand_bits(ctx->map.intlv_bit_pos, + ctx->map.total_intlv_bits, + ctx->ret_addr); +} + +/* + * Make two gaps in address for N bits. + * First gap is a single bit at bit P. + * Second gap is the remaining N-1 bits at bit 12. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * First gap + * expanded address bits: [20+1 : p+1][p][p-1 : 0] + * [21 : 9][8][7 : 0] + * + * Second gap uses result from first. + * r = n - 1; remaining interleave bits + * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0] + * [23 : 14][13 : 12][11 : 0] + */ +static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) +{ + /* Make a single space at the interleave bit. */ + u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr); + + /* Done if there's only a single interleave bit. */ + if (ctx->map.total_intlv_bits <= 1) + return denorm_addr; + + /* Make spaces for the remaining interleave bits starting at bit 12. */ + return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); +} + +/* + * Take the current calculated address and shift enough bits in the middle + * to make a gap where the interleave bits will be inserted. + */ +static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF2_2CHAN_HASH: + return make_space_for_coh_st_id_at_intlv_bit(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return make_space_for_coh_st_id_split_2_1(ctx); + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +static u16 get_coh_st_id_df2(struct addr_ctx *ctx) +{ + u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies); + u8 num_intlv_bits; + u16 coh_st_id, mask; + + coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + /* Channel interleave bits */ + num_intlv_bits = order_base_2(ctx->map.num_intlv_chan); + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Die interleave bits */ + if (num_die_intlv_bits) { + u16 die_bits; + + mask = GENMASK(num_die_intlv_bits - 1, 0); + die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask; + die_bits >>= df_cfg.die_id_shift; + + coh_st_id |= (die_bits & mask) << num_intlv_bits; + num_intlv_bits += num_die_intlv_bits; + } + + /* Socket interleave bits */ + if (num_socket_intlv_bits) { + u16 socket_bits; + + mask = GENMASK(num_socket_intlv_bits - 1, 0); + socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; + socket_bits >>= df_cfg.socket_id_shift; + + coh_st_id |= (socket_bits & mask) << num_intlv_bits; + } + + return coh_st_id; +} + +static u16 get_coh_st_id_df4(struct addr_ctx *ctx) +{ + /* + * Start with the original component mask and the number of interleave + * bits for the channels in this map. + */ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u16 mask = df_cfg.component_id_mask; + + u16 socket_bits; + + /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */ + u16 coh_st_id = ctx->coh_st_fabric_id & mask; + + /* + * Subtract the "base" Destination Fabric ID. + * This accounts for systems with disabled Coherent Stations. + */ + coh_st_id -= get_dst_fabric_id(ctx) & mask; + + /* + * Generate and use a new mask based on the number of bits + * needed for channel interleaving in this map. + */ + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Done if socket interleaving is not enabled. */ + if (ctx->map.num_intlv_sockets <= 1) + return coh_st_id; + + /* + * Figure out how many bits are needed for the number of + * interleaved sockets. And shift the derived Coherent Station ID to account + * for these. + */ + num_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + coh_st_id <<= num_intlv_bits; + + /* Generate a new mask for the socket interleaving bits. */ + mask = GENMASK(num_intlv_bits - 1, 0); + + /* Get the socket interleave bits from the original Coherent Station Fabric ID. */ + socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift; + + /* Apply the appropriate socket bits to the derived Coherent Station ID. */ + coh_st_id |= socket_bits & mask; + + return coh_st_id; +} + +/* + * Derive the correct Coherent Station ID that represents the interleave bits + * used within the system physical address. This accounts for the + * interleave mode, number of interleaved channels/dies/sockets, and + * other system/mode-specific bit swizzling. + * + * Returns: Coherent Station ID on success. + * All bits set on error. + */ +static u16 calculate_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF2_2CHAN_HASH: + return get_coh_st_id_df2(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return get_coh_st_id_df4(ctx); + + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0; + } +} + +static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos); +} + +static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[0] at the interleave bit. */ + denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos; + + /* Insert coh_st_id[2:1] at bit 12. */ + denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11; + + return denorm_addr; +} + +static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[1:0] at bit 8. */ + denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8; + + /* + * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3. + * Grab both because bit 3 will be clear if unused. + */ + denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10; + + return denorm_addr; +} + +static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF2_2CHAN_HASH: + return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id); + + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u16 component_id, log_fabric_id; + + /* Start with the physical COH_ST Fabric ID. */ + u16 phys_fabric_id = ctx->coh_st_fabric_id; + + /* Skip logical ID lookup if remapping is disabled. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && + ctx->map.intlv_mode != DF3_6CHAN) + return phys_fabric_id; + + /* Mask off the Node ID bits to get the "local" Component ID. */ + component_id = phys_fabric_id & df_cfg.component_id_mask; + + /* + * Search the list of logical Component IDs for the one that + * matches this physical Component ID. + */ + for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) { + if (ctx->map.remap_array[log_fabric_id] == component_id) + break; + } + + if (log_fabric_id == MAX_COH_ST_CHANNELS) + atl_debug(ctx, "COH_ST remap entry not found for 0x%x", + log_fabric_id); + + /* Get the Node ID bits from the physical and apply to the logical. */ + return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; +} + +static int denorm_addr_common(struct addr_ctx *ctx) +{ + u64 denorm_addr; + u16 coh_st_id; + + /* + * Convert the original physical COH_ST Fabric ID to a logical value. + * This is required for non-power-of-two and other interleaving modes. + */ + ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx); + + denorm_addr = make_space_for_coh_st_id(ctx); + coh_st_id = calculate_coh_st_id(ctx); + ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id); + return 0; +} + +static int denorm_addr_df3_6chan(struct addr_ctx *ctx) +{ + u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask; + u8 total_intlv_bits = ctx->map.total_intlv_bits; + u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos; + u64 msb_intlv_bits, temp_addr_a, temp_addr_b; + u8 np2_bits = ctx->map.np2_bits; + + if (ctx->map.intlv_mode != DF3_6CHAN) + return -EINVAL; + + /* + * 'np2_bits' holds the number of bits needed to cover the + * amount of memory (rounded up) in this map using 64K chunks. + * + * Example: + * Total memory in map: 6GB + * Rounded up to next power-of-2: 8GB + * Number of 64K chunks: 0x20000 + * np2_bits = log2(# of chunks): 17 + * + * Get the two most-significant interleave bits from the + * input address based on the following: + * + * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits] + */ + low_bit = 14 + np2_bits - total_intlv_bits; + msb_intlv_bits = ctx->ret_addr >> low_bit; + msb_intlv_bits &= 0x3; + + /* + * If MSB are 11b, then logical COH_ST ID is 6 or 7. + * Need to adjust based on the mod3 result. + */ + if (msb_intlv_bits == 3) { + u8 addr_mod, phys_addr_msb, msb_coh_st_id; + + /* Get the remaining interleave bits from the input address. */ + temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + + /* Calculate the logical COH_ST offset based on mod3. */ + addr_mod = temp_addr_b % 3; + + /* Get COH_ST ID bits [2:1]. */ + msb_coh_st_id = (coh_st_id >> 1) & 0x3; + + /* Get the bit that starts the physical address bits. */ + phys_addr_msb = (intlv_bit + np2_bits + 1); + phys_addr_msb &= BIT(0); + phys_addr_msb++; + phys_addr_msb *= 3 - addr_mod + msb_coh_st_id; + phys_addr_msb %= 3; + + /* Move the physical address MSB to the correct place. */ + temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit); + + /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */ + coh_st_id &= BIT(0); + coh_st_id |= GENMASK(2, 1); + } else { + temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + } + + temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr; + temp_addr_b <<= intlv_bit + total_intlv_bits; + + ctx->ret_addr = temp_addr_a | temp_addr_b; + ctx->ret_addr |= coh_st_id << intlv_bit; + return 0; +} + +static int denorm_addr_df4_np2(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u16 group, group_offset, log_coh_st_offset; + unsigned int mod_value, shift_value; + u16 mask = df_cfg.component_id_mask; + u64 temp_addr_a, temp_addr_b; + u8 hash_pa8, hashed_bit; + + switch (ctx->map.intlv_mode) { + case DF4_NPS4_3CHAN_HASH: + mod_value = 3; + shift_value = 13; + break; + case DF4_NPS2_6CHAN_HASH: + mod_value = 3; + shift_value = 12; + break; + case DF4_NPS1_12CHAN_HASH: + mod_value = 3; + shift_value = 11; + break; + case DF4_NPS2_5CHAN_HASH: + mod_value = 5; + shift_value = 13; + break; + case DF4_NPS1_10CHAN_HASH: + mod_value = 5; + shift_value = 12; + break; + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + }; + + if (ctx->map.num_intlv_sockets == 1) { + hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; + temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); + } else { + hash_pa8 = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask); + hash_pa8 >>= df_cfg.socket_id_shift; + temp_addr_a = ctx->ret_addr; + } + + /* Make a gap for the real bit [8]. */ + temp_addr_a = expand_bits(8, 1, temp_addr_a); + + /* Make an additional gap for bits [13:12], as appropriate.*/ + if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) { + temp_addr_a = expand_bits(13, 1, temp_addr_a); + } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) { + temp_addr_a = expand_bits(12, 2, temp_addr_a); + } + + /* Keep bits [13:0]. */ + temp_addr_a &= GENMASK_ULL(13, 0); + + /* Get the appropriate high bits. */ + shift_value += 1 - ilog2(ctx->map.num_intlv_sockets); + temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr; + temp_addr_b >>= shift_value; + temp_addr_b *= mod_value; + + /* + * Coherent Stations are divided into groups. + * + * Multiples of 3 (mod3) are divided into quadrants. + * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8] + * [3, 4, 5] [9, 10, 11] + * + * Multiples of 5 (mod5) are divided into sides. + * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] + */ + + /* + * Calculate the logical offset for the COH_ST within its DRAM Address map. + * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then + * log_coh_st_offset = 8 - 5 = 3 + */ + log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask); + + /* + * Figure out the group number. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group = 3 / 5 = 0 + */ + group = log_coh_st_offset / mod_value; + + /* + * Figure out the offset within the group. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group_offset = 3 % 5 = 3 + */ + group_offset = log_coh_st_offset % mod_value; + + /* Adjust group_offset if the hashed bit [8] is set. */ + if (hash_pa8) { + if (!group_offset) + group_offset = mod_value - 1; + else + group_offset--; + } + + /* Add in the group offset to the high bits. */ + temp_addr_b += group_offset; + + /* Shift the high bits to the proper starting position. */ + temp_addr_b <<= 14; + + /* Combine the high and low bits together. */ + ctx->ret_addr = temp_addr_a | temp_addr_b; + + /* Account for hashing here instead of in dehash_address(). */ + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + hashed_bit = !!hash_pa8; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 8; + + /* Done for 3 and 5 channel. */ + if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH) + return 0; + + /* Select the proper 'group' bit to use for Bit 13. */ + if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) + hashed_bit = !!(group & BIT(1)); + else + hashed_bit = group & BIT(0); + + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 13; + + /* Done for 6 and 10 channel. */ + if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH) + return 0; + + hashed_bit = group & BIT(0); + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 12; + return 0; +} + +int denormalize_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 0; + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + return denorm_addr_df4_np2(ctx); + case DF3_6CHAN: + return denorm_addr_df3_6chan(ctx); + default: + return denorm_addr_common(ctx); + } +} diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h new file mode 100644 index 000000000000..13f1b6098c96 --- /dev/null +++ b/drivers/ras/amd/atl/internal.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * internal.h : Helper functions and common defines + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#ifndef __AMD_ATL_INTERNAL_H__ +#define __AMD_ATL_INTERNAL_H__ + +#include +#include +#include + +#include + +#include "reg_fields.h" + +/* Maximum possible number of Coherent Stations within a single Data Fabric. */ +#define MAX_COH_ST_CHANNELS 32 + +/* PCI ID for Zen4 Server DF Function 0. */ +#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 + +/* Shift needed for adjusting register values to true values. */ +#define DF_DRAM_BASE_LIMIT_LSB 28 + +enum df_revisions { + UNKNOWN, + DF2, + DF3, + DF3p5, + DF4, + DF4p5, +}; + +/* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */ +enum intlv_modes { + NONE = 0x00, + NOHASH_2CHAN = 0x01, + NOHASH_4CHAN = 0x03, + NOHASH_8CHAN = 0x05, + DF3_6CHAN = 0x06, + NOHASH_16CHAN = 0x07, + NOHASH_32CHAN = 0x08, + DF3_COD4_2CHAN_HASH = 0x0C, + DF3_COD2_4CHAN_HASH = 0x0D, + DF3_COD1_8CHAN_HASH = 0x0E, + DF4_NPS4_2CHAN_HASH = 0x10, + DF4_NPS2_4CHAN_HASH = 0x11, + DF4_NPS1_8CHAN_HASH = 0x12, + DF4_NPS4_3CHAN_HASH = 0x13, + DF4_NPS2_6CHAN_HASH = 0x14, + DF4_NPS1_12CHAN_HASH = 0x15, + DF4_NPS2_5CHAN_HASH = 0x16, + DF4_NPS1_10CHAN_HASH = 0x17, + DF2_2CHAN_HASH = 0x21, + /* DF4.5 modes are all IntLvNumChan + 0x20 */ + DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, + DF4p5_NPS0_24CHAN_1K_HASH = 0x2E, + DF4p5_NPS4_2CHAN_1K_HASH = 0x30, + DF4p5_NPS2_4CHAN_1K_HASH = 0x31, + DF4p5_NPS1_8CHAN_1K_HASH = 0x32, + DF4p5_NPS4_3CHAN_1K_HASH = 0x33, + DF4p5_NPS2_6CHAN_1K_HASH = 0x34, + DF4p5_NPS1_12CHAN_1K_HASH = 0x35, + DF4p5_NPS2_5CHAN_1K_HASH = 0x36, + DF4p5_NPS1_10CHAN_1K_HASH = 0x37, + DF4p5_NPS4_2CHAN_2K_HASH = 0x40, + DF4p5_NPS2_4CHAN_2K_HASH = 0x41, + DF4p5_NPS1_8CHAN_2K_HASH = 0x42, + DF4p5_NPS1_16CHAN_2K_HASH = 0x43, + DF4p5_NPS4_3CHAN_2K_HASH = 0x44, + DF4p5_NPS2_6CHAN_2K_HASH = 0x45, + DF4p5_NPS1_12CHAN_2K_HASH = 0x46, + DF4p5_NPS0_24CHAN_2K_HASH = 0x47, + DF4p5_NPS2_5CHAN_2K_HASH = 0x48, + DF4p5_NPS1_10CHAN_2K_HASH = 0x49, +}; + +struct df_flags { + __u8 legacy_ficaa : 1, + socket_id_shift_quirk : 1, + __reserved_0 : 6; +}; + +struct df_config { + enum df_revisions rev; + + /* + * These masks operate on the 16-bit Coherent Station IDs, + * e.g. Instance, Fabric, Destination, etc. + */ + u16 component_id_mask; + u16 die_id_mask; + u16 node_id_mask; + u16 socket_id_mask; + + /* + * Least-significant bit of Node ID portion of the + * system-wide Coherent Station Fabric ID. + */ + u8 node_id_shift; + + /* + * Least-significant bit of Die portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 die_id_shift; + + /* + * Least-significant bit of Socket portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 socket_id_shift; + + /* Number of DRAM Address maps visible in a Coherent Station. */ + u8 num_coh_st_maps; + + /* Global flags to handle special cases. */ + struct df_flags flags; +}; + +extern struct df_config df_cfg; + +struct dram_addr_map { + /* + * Each DRAM Address Map can operate independently + * in different interleaving modes. + */ + enum intlv_modes intlv_mode; + + /* System-wide number for this address map. */ + u8 num; + + /* Raw register values */ + u32 base; + u32 limit; + u32 ctl; + u32 intlv; + + /* + * Logical to Physical Coherent Station Remapping array + * + * Index: Logical Coherent Station Instance ID + * Value: Physical Coherent Station Instance ID + * + * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id] + */ + u8 remap_array[MAX_COH_ST_CHANNELS]; + + /* + * Number of bits covering DRAM Address map 0 + * when interleaving is non-power-of-2. + * + * Used only for DF3_6CHAN. + */ + u8 np2_bits; + + /* Position of the 'interleave bit'. */ + u8 intlv_bit_pos; + /* Number of channels interleaved in this map. */ + u8 num_intlv_chan; + /* Number of dies interleaved in this map. */ + u8 num_intlv_dies; + /* Number of sockets interleaved in this map. */ + u8 num_intlv_sockets; + /* + * Total number of channels interleaved accounting + * for die and socket interleaving. + */ + u8 total_intlv_chan; + /* Total bits needed to cover 'total_intlv_chan'. */ + u8 total_intlv_bits; +}; + +/* Original input values cached for debug printing. */ +struct addr_ctx_inputs { + u64 norm_addr; + u8 socket_id; + u8 die_id; + u8 coh_st_inst_id; +}; + +struct addr_ctx { + u64 ret_addr; + + struct addr_ctx_inputs inputs; + struct dram_addr_map map; + + /* AMD Node ID calculated from Socket and Die IDs. */ + u8 node_id; + + /* + * Coherent Station Instance ID + * Local ID used within a 'node'. + */ + u16 inst_id; + + /* + * Coherent Station Fabric ID + * System-wide ID that includes 'node' bits. + */ + u16 coh_st_fabric_id; +}; + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); + +int get_df_system_info(void); +int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); + +int get_address_map(struct addr_ctx *ctx); + +int denormalize_address(struct addr_ctx *ctx); +int dehash_address(struct addr_ctx *ctx); + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); + +/* + * Make a gap in @data that is @num_bits long starting at @bit_num. + * e.g. data = 11111111'b + * bit_num = 3 + * num_bits = 2 + * result = 1111100111'b + */ +static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data) +{ + u64 temp1, temp2; + + if (!num_bits) + return data; + + if (!bit_num) { + WARN_ON_ONCE(num_bits >= BITS_PER_LONG); + return data << num_bits; + } + + WARN_ON_ONCE(bit_num >= BITS_PER_LONG); + + temp1 = data & GENMASK_ULL(bit_num - 1, 0); + + temp2 = data & GENMASK_ULL(63, bit_num); + temp2 <<= num_bits; + + return temp1 | temp2; +} + +/* + * Remove bits in @data between @low_bit and @high_bit inclusive. + * e.g. data = XXXYYZZZ'b + * low_bit = 3 + * high_bit = 4 + * result = XXXZZZ'b + */ +static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data) +{ + u64 temp1, temp2; + + WARN_ON_ONCE(high_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit > high_bit); + + if (!low_bit) + return data >> (high_bit++); + + temp1 = GENMASK_ULL(low_bit - 1, 0) & data; + temp2 = GENMASK_ULL(63, high_bit + 1) & data; + temp2 >>= high_bit - low_bit + 1; + + return temp1 | temp2; +} + +#define atl_debug(ctx, fmt, arg...) \ + pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\ + (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\ + (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg) + +static inline void atl_debug_on_bad_df_rev(void) +{ + pr_debug("Unrecognized DF rev: %u", df_cfg.rev); +} + +static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) +{ + atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); +} + +#endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c new file mode 100644 index 000000000000..33f549b6255a --- /dev/null +++ b/drivers/ras/amd/atl/map.c @@ -0,0 +1,665 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * map.c : Functions to read and decode DRAM address maps + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +static int df2_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF2_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == 8) + ctx->map.intlv_mode = DF2_2CHAN_HASH; + + if (ctx->map.intlv_mode != NONE && + ctx->map.intlv_mode != NOHASH_2CHAN && + ctx->map.intlv_mode != DF2_2CHAN_HASH) + return -EINVAL; + + return 0; +} + +static int df3_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3_INTLV_NUM_CHAN, ctx->map.base); + return 0; +} + +static int df3p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3p5_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH || + ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH || + ctx->map.intlv_mode == DF3_COD1_8CHAN_HASH || + ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4p5_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode <= NOHASH_32CHAN) + return 0; + + /* + * Modes matching the ranges above are returned as-is. + * + * All other modes are "fixed up" by adding 20h to make a unique value. + */ + ctx->map.intlv_mode += 0x20; + + return 0; +} + +static int get_intlv_mode(struct addr_ctx *ctx) +{ + int ret; + + switch (df_cfg.rev) { + case DF2: + ret = df2_get_intlv_mode(ctx); + break; + case DF3: + ret = df3_get_intlv_mode(ctx); + break; + case DF3p5: + ret = df3p5_get_intlv_mode(ctx); + break; + case DF4: + ret = df4_get_intlv_mode(ctx); + break; + case DF4p5: + ret = df4p5_get_intlv_mode(ctx); + break; + default: + ret = -EINVAL; + } + + if (ret) + atl_debug_on_bad_df_rev(); + + return ret; +} + +static u64 get_hi_addr_offset(u32 reg_dram_offset) +{ + u8 shift = DF_DRAM_BASE_LIMIT_LSB; + u64 hi_addr_offset; + + switch (df_cfg.rev) { + case DF2: + hi_addr_offset = FIELD_GET(DF2_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF3: + case DF3p5: + hi_addr_offset = FIELD_GET(DF3_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF4: + case DF4p5: + hi_addr_offset = FIELD_GET(DF4_HI_ADDR_OFFSET, reg_dram_offset); + break; + default: + hi_addr_offset = 0; + atl_debug_on_bad_df_rev(); + } + + return hi_addr_offset << shift; +} + +/* + * Returns: 0 if offset is disabled. + * 1 if offset is enabled. + * -EINVAL on error. + */ +static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u32 reg_dram_offset; + u8 map_num; + + /* Should not be called for map 0. */ + if (!ctx->map.num) { + atl_debug(ctx, "Trying to find DRAM offset for map 0"); + return -EINVAL; + } + + /* + * DramOffset registers don't exist for map 0, so the base register + * actually refers to map 1. + * Adjust the map_num for the register offsets. + */ + map_num = ctx->map.num - 1; + + if (df_cfg.rev >= DF4) { + /* Read D18F7x140 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x140 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + + } else { + /* Read D18F0x1B4 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x1B4 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + } + + if (!FIELD_GET(DF_HI_ADDR_OFFSET_EN, reg_dram_offset)) + return 0; + + *norm_offset = get_hi_addr_offset(reg_dram_offset); + + return 1; +} + +static int df3_6ch_get_dram_addr_map(struct addr_ctx *ctx) +{ + u16 dst_fabric_id = FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + u8 i, j, shift = 4, mask = 0xF; + u32 reg, offset = 0x60; + u16 dst_node_id; + + /* Get Socket 1 register. */ + if (dst_fabric_id & df_cfg.socket_id_mask) + offset = 0x68; + + /* Read D18F0x06{0,8} (DF::Skt0CsTargetRemap0)/(DF::Skt0CsTargetRemap1) */ + if (df_indirect_read_broadcast(ctx->node_id, 0, offset, ®)) + return -EINVAL; + + /* Save 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (reg >> (j * shift)) & mask; + + dst_node_id = dst_fabric_id & df_cfg.node_id_mask; + dst_node_id >>= df_cfg.node_id_shift; + + /* Read D18F2x090 (DF::Np2ChannelConfig) */ + if (df_indirect_read_broadcast(dst_node_id, 2, 0x90, ®)) + return -EINVAL; + + ctx->map.np2_bits = FIELD_GET(DF_LOG2_ADDR_64K_SPACE0, reg); + return 0; +} + +static int df2_get_dram_addr_map(struct addr_ctx *ctx) +{ + /* Read D18F0x110 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x110 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F0x114 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x114 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + return 0; +} + +static int df3_get_dram_addr_map(struct addr_ctx *ctx) +{ + if (df2_get_dram_addr_map(ctx)) + return -EINVAL; + + /* Read D18F0x3F8 (DfGlobalCtl). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x3F8, + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + return 0; +} + +static int df4_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 4, mask = 0xF; + u32 remap_reg; + + /* Read D18F7xE00 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE00 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7xE04 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE04 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7xE08 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE08 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7xE0C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE0C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 8 remap entries. */ + for (i = 8, j = 0; i < 16; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int df4p5_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 5, mask = 0x1F; + u32 remap_reg; + + /* Read D18F7x200 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x200 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7x204 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x204 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7x208 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x208 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7x20C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x20C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4p5_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 6 remap entries. */ + for (i = 0, j = 0; i < 6; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 6, j = 0; i < 12; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x188 (CsTargetRemap0C). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x188 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 12, j = 0; i < 18; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int get_dram_addr_map(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return df2_get_dram_addr_map(ctx); + case DF3: + case DF3p5: return df3_get_dram_addr_map(ctx); + case DF4: return df4_get_dram_addr_map(ctx); + case DF4p5: return df4p5_get_dram_addr_map(ctx); + default: + atl_debug_on_bad_df_rev(); + return -EINVAL; + } +} + +static int get_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u32 reg; + + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) + return -EINVAL; + + if (df_cfg.rev < DF4p5) + ctx->coh_st_fabric_id = FIELD_GET(DF2_COH_ST_FABRIC_ID, reg); + else + ctx->coh_st_fabric_id = FIELD_GET(DF4p5_COH_ST_FABRIC_ID, reg); + + return 0; +} + +static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u64 last_offset = 0; + int ret; + + for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) { + ret = get_dram_offset(ctx, norm_offset); + if (ret < 0) + return ret; + + /* Continue search if this map's offset is not enabled. */ + if (!ret) + continue; + + /* Enabled offsets should never be 0. */ + if (*norm_offset == 0) { + atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num); + return -EINVAL; + } + + /* Offsets should always increase from one map to the next. */ + if (*norm_offset <= last_offset) { + atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)", + ctx->map.num, *norm_offset, last_offset); + return -EINVAL; + } + + /* Match if this map's offset is less than the current calculated address. */ + if (ctx->ret_addr >= *norm_offset) + break; + + last_offset = *norm_offset; + } + + /* + * Finished search without finding a match. + * Reset to map 0 and no offset. + */ + if (ctx->map.num >= df_cfg.num_coh_st_maps) { + ctx->map.num = 0; + *norm_offset = 0; + } + + return 0; +} + +static bool valid_map(struct addr_ctx *ctx) +{ + if (df_cfg.rev >= DF4) + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.ctl); + else + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.base); +} + +static int get_address_map_common(struct addr_ctx *ctx) +{ + u64 norm_offset = 0; + + if (get_coh_st_fabric_id(ctx)) + return -EINVAL; + + if (find_normalized_offset(ctx, &norm_offset)) + return -EINVAL; + + if (get_dram_addr_map(ctx)) + return -EINVAL; + + if (!valid_map(ctx)) + return -EINVAL; + + ctx->ret_addr -= norm_offset; + + return 0; +} + +static u8 get_num_intlv_chan(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 1; + case NOHASH_2CHAN: + case DF2_2CHAN_HASH: + case DF3_COD4_2CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + return 2; + case DF4_NPS4_3CHAN_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + return 3; + case NOHASH_4CHAN: + case DF3_COD2_4CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + return 4; + case DF4_NPS2_5CHAN_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 5; + case DF3_6CHAN: + case DF4_NPS2_6CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + return 6; + case NOHASH_8CHAN: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + return 8; + case DF4_NPS1_10CHAN_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + return 10; + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + return 12; + case NOHASH_16CHAN: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return 16; + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + return 24; + case NOHASH_32CHAN: + return 32; + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } +} + +static void calculate_intlv_bits(struct addr_ctx *ctx) +{ + ctx->map.num_intlv_chan = get_num_intlv_chan(ctx); + + ctx->map.total_intlv_chan = ctx->map.num_intlv_chan; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_dies; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_sockets; + + /* + * Get the number of bits needed to cover this many channels. + * order_base_2() rounds up automatically. + */ + ctx->map.total_intlv_bits = order_base_2(ctx->map.total_intlv_chan); +} + +static u8 get_intlv_bit_pos(struct addr_ctx *ctx) +{ + u8 addr_sel = 0; + + switch (df_cfg.rev) { + case DF2: + addr_sel = FIELD_GET(DF2_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF3: + case DF3p5: + addr_sel = FIELD_GET(DF3_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF4: + case DF4p5: + addr_sel = FIELD_GET(DF4_INTLV_ADDR_SEL, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Add '8' to get the 'interleave bit position'. */ + return addr_sel + 8; +} + +static u8 get_num_intlv_dies(struct addr_ctx *ctx) +{ + u8 dies = 0; + + switch (df_cfg.rev) { + case DF2: + dies = FIELD_GET(DF2_INTLV_NUM_DIES, ctx->map.limit); + break; + case DF3: + dies = FIELD_GET(DF3_INTLV_NUM_DIES, ctx->map.base); + break; + case DF3p5: + dies = FIELD_GET(DF3p5_INTLV_NUM_DIES, ctx->map.base); + break; + case DF4: + case DF4p5: + dies = FIELD_GET(DF4_INTLV_NUM_DIES, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 die, 1 -> 2 dies, etc. */ + return 1 << dies; +} + +static u8 get_num_intlv_sockets(struct addr_ctx *ctx) +{ + u8 sockets = 0; + + switch (df_cfg.rev) { + case DF2: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.limit); + break; + case DF3: + case DF3p5: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.base); + break; + case DF4: + case DF4p5: + sockets = FIELD_GET(DF4_INTLV_NUM_SOCKETS, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 sockets, 1 -> 2 sockets, etc. */ + return 1 << sockets; +} + +static int get_global_map_data(struct addr_ctx *ctx) +{ + if (get_intlv_mode(ctx)) + return -EINVAL; + + if (ctx->map.intlv_mode == DF3_6CHAN && + df3_6ch_get_dram_addr_map(ctx)) + return -EINVAL; + + ctx->map.intlv_bit_pos = get_intlv_bit_pos(ctx); + ctx->map.num_intlv_dies = get_num_intlv_dies(ctx); + ctx->map.num_intlv_sockets = get_num_intlv_sockets(ctx); + calculate_intlv_bits(ctx); + + return 0; +} + +static void dump_address_map(struct dram_addr_map *map) +{ + u8 i; + + pr_debug("intlv_mode=0x%x", map->intlv_mode); + pr_debug("num=0x%x", map->num); + pr_debug("base=0x%x", map->base); + pr_debug("limit=0x%x", map->limit); + pr_debug("ctl=0x%x", map->ctl); + pr_debug("intlv=0x%x", map->intlv); + + for (i = 0; i < MAX_COH_ST_CHANNELS; i++) + pr_debug("remap_array[%u]=0x%x", i, map->remap_array[i]); + + pr_debug("intlv_bit_pos=%u", map->intlv_bit_pos); + pr_debug("num_intlv_chan=%u", map->num_intlv_chan); + pr_debug("num_intlv_dies=%u", map->num_intlv_dies); + pr_debug("num_intlv_sockets=%u", map->num_intlv_sockets); + pr_debug("total_intlv_chan=%u", map->total_intlv_chan); + pr_debug("total_intlv_bits=%u", map->total_intlv_bits); +} + +int get_address_map(struct addr_ctx *ctx) +{ + int ret; + + ret = get_address_map_common(ctx); + if (ret) + return ret; + + ret = get_global_map_data(ctx); + if (ret) + return ret; + + dump_address_map(&ctx->map); + + return ret; +} diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h new file mode 100644 index 000000000000..6aaa5093f42c --- /dev/null +++ b/drivers/ras/amd/atl/reg_fields.h @@ -0,0 +1,603 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * reg_fields.h : Register field definitions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +/* + * Notes on naming: + * 1) Use "DF_" prefix for fields that are the same for all revisions. + * 2) Use "DFx_" prefix for fields that differ between revisions. + * a) "x" is the first major revision where the new field appears. + * b) E.g., if DF2 and DF3 have the same field, then call it DF2. + * c) E.g., if DF3p5 and DF4 have the same field, then call it DF4. + */ + +/* + * Coherent Station Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x50 [Fabric Block Instance Information 3] + * DF2 BlockFabricId [19:8] + * DF3 BlockFabricId [19:8] + * DF3p5 BlockFabricId [19:8] + * DF4 BlockFabricId [19:8] + * DF4p5 BlockFabricId [15:8] + */ +#define DF2_COH_ST_FABRIC_ID GENMASK(19, 8) +#define DF4p5_COH_ST_FABRIC_ID GENMASK(15, 8) + +/* + * Component ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 ComponentIdMask [9:0] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 ComponentIdMask [15:0] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 ComponentIdMask [15:0] + * DF4p5 ComponentIdMask [15:0] + */ +#define DF3_COMPONENT_ID_MASK GENMASK(9, 0) +#define DF4_COMPONENT_ID_MASK GENMASK(15, 0) + +/* + * Destination Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DstFabricID [7:0] + * DF3 DstFabricID [9:0] + * DF3 DstFabricID [11:0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 DstFabricID [27:16] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 DstFabricID [23:16] + */ +#define DF2_DST_FABRIC_ID GENMASK(7, 0) +#define DF3_DST_FABRIC_ID GENMASK(9, 0) +#define DF3p5_DST_FABRIC_ID GENMASK(11, 0) +#define DF4_DST_FABRIC_ID GENMASK(27, 16) +#define DF4p5_DST_FABRIC_ID GENMASK(23, 16) + +/* + * Die ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdMask [15:8] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 DieIdMask [18:16] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 DieIdMask [15:0] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 DieIdMask [15:0] + * DF4p5 DieIdMask [15:0] + */ +#define DF2_DIE_ID_MASK GENMASK(15, 8) +#define DF3_DIE_ID_MASK GENMASK(18, 16) +#define DF4_DIE_ID_MASK GENMASK(15, 0) + +/* + * Die ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdShift [27:24] + * + * DF3 N/A + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF2_DIE_ID_SHIFT GENMASK(27, 24) + +/* + * DRAM Address Range Valid + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 AddrRngVal [0] + * DF3 AddrRngVal [0] + * DF3p5 AddrRngVal [0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 AddrRngVal [0] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 AddrRngVal [0] + */ +#define DF_ADDR_RANGE_VAL BIT(0) + +/* + * DRAM Base Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 DramBaseAddr [31:12] + * DF3 DramBaseAddr [31:12] + * DF3p5 DramBaseAddr [31:12] + * + * D18F7xE00 [DRAM Base Address] + * DF4 DramBaseAddr [27:0] + * + * D18F7x200 [DRAM Base Address] + * DF4p5 DramBaseAddr [27:0] + */ +#define DF2_BASE_ADDR GENMASK(31, 12) +#define DF4_BASE_ADDR GENMASK(27, 0) + +/* + * DRAM Hole Base + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F0x104 [DRAM Hole Control] + * DF2 DramHoleBase [31:24] + * DF3 DramHoleBase [31:24] + * DF3p5 DramHoleBase [31:24] + * + * D18F7x104 [DRAM Hole Control] + * DF4 DramHoleBase [31:24] + * DF4p5 DramHoleBase [31:24] + */ +#define DF_DRAM_HOLE_BASE_MASK GENMASK(31, 24) + +/* + * DRAM Limit Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DramLimitAddr [31:12] + * DF3 DramLimitAddr [31:12] + * DF3p5 DramLimitAddr [31:12] + * + * D18F7xE04 [DRAM Limit Address] + * DF4 DramLimitAddr [27:0] + * + * D18F7x204 [DRAM Limit Address] + * DF4p5 DramLimitAddr [27:0] + */ +#define DF2_DRAM_LIMIT_ADDR GENMASK(31, 12) +#define DF4_DRAM_LIMIT_ADDR GENMASK(27, 0) + +/* + * Hash Interleave Controls + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F0x3F8 [DF Global Control] + * DF3 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * DF3p5 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * D18F7xE08 [DRAM Address Control] + * DF4 HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 HashIntlvCtl4K [7] + * HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * HashIntlvCtl1T [15] + */ +#define DF3_HASH_CTL_64K BIT(20) +#define DF3_HASH_CTL_2M BIT(21) +#define DF3_HASH_CTL_1G BIT(22) +#define DF4_HASH_CTL_4K BIT(7) +#define DF4_HASH_CTL_64K BIT(8) +#define DF4_HASH_CTL_2M BIT(9) +#define DF4_HASH_CTL_1G BIT(10) +#define DF4_HASH_CTL_1T BIT(15) + +/* + * High Address Offset + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffset [31:20] + * DF3 HiAddrOffset [31:12] + * DF3p5 HiAddrOffset [31:12] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffset [24:1] + * DF4p5 HiAddrOffset [24:1] + */ +#define DF2_HI_ADDR_OFFSET GENMASK(31, 20) +#define DF3_HI_ADDR_OFFSET GENMASK(31, 12) +#define DF4_HI_ADDR_OFFSET GENMASK(24, 1) + +/* + * High Address Offset Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffsetEn [0] + * DF3 HiAddrOffsetEn [0] + * DF3p5 HiAddrOffsetEn [0] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffsetEn [0] + * DF4p5 HiAddrOffsetEn [0] + */ +#define DF_HI_ADDR_OFFSET_EN BIT(0) + +/* + * Interleave Address Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvAddrSel [10:8] + * DF3 IntLvAddrSel [11:9] + * DF3p5 IntLvAddrSel [11:9] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvAddrSel [2:0] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvAddrSel [2:0] + */ +#define DF2_INTLV_ADDR_SEL GENMASK(10, 8) +#define DF3_INTLV_ADDR_SEL GENMASK(11, 9) +#define DF4_INTLV_ADDR_SEL GENMASK(2, 0) + +/* + * Interleave Number of Channels + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvNumChan [7:4] + * DF3 IntLvNumChan [5:2] + * DF3p5 IntLvNumChan [6:2] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumChan [8:4] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumChan [9:4] + */ +#define DF2_INTLV_NUM_CHAN GENMASK(7, 4) +#define DF3_INTLV_NUM_CHAN GENMASK(5, 2) +#define DF3p5_INTLV_NUM_CHAN GENMASK(6, 2) +#define DF4_INTLV_NUM_CHAN GENMASK(8, 4) +#define DF4p5_INTLV_NUM_CHAN GENMASK(9, 4) + +/* + * Interleave Number of Dies + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumDies [11:10] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumDies [7:6] + * DF3p5 IntLvNumDies [7] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumDies [13:12] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumDies [13:12] + */ +#define DF2_INTLV_NUM_DIES GENMASK(11, 10) +#define DF3_INTLV_NUM_DIES GENMASK(7, 6) +#define DF3p5_INTLV_NUM_DIES BIT(7) +#define DF4_INTLV_NUM_DIES GENMASK(13, 12) + +/* + * Interleave Number of Sockets + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumSockets [8] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumSockets [8] + * DF3p5 IntLvNumSockets [8] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumSockets [18] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumSockets [18] + */ +#define DF2_INTLV_NUM_SOCKETS BIT(8) +#define DF4_INTLV_NUM_SOCKETS BIT(18) + +/* + * Legacy MMIO Hole Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 LgcyMmioHoleEn [1] + * DF3 LgcyMmioHoleEn [1] + * DF3p5 LgcyMmioHoleEn [1] + * + * D18F7xE08 [DRAM Address Control] + * DF4 LgcyMmioHoleEn [1] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 LgcyMmioHoleEn [1] + */ +#define DF_LEGACY_MMIO_HOLE_EN BIT(1) + +/* + * Log2 Address 64K Space 0 + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F2x90 [Non-power-of-2 channel Configuration Register for COH_ST DRAM Address Maps] + * DF3 Log2Addr64KSpace0 [5:0] + * + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF_LOG2_ADDR_64K_SPACE0 GENMASK(5, 0) + +/* + * Major Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MajorRevision [27:24] + * DF4p5 MajorRevision [27:24] + */ +#define DF_MAJOR_REVISION GENMASK(27, 24) + +/* + * Minor Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MinorRevision [23:16] + * DF4p5 MinorRevision [23:16] + */ +#define DF_MINOR_REVISION GENMASK(23, 16) + +/* + * Node ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 NodeIdMask [25:16] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 NodeIdMask [31:16] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 NodeIdMask [31:16] + * DF4p5 NodeIdMask [31:16] + */ +#define DF3_NODE_ID_MASK GENMASK(25, 16) +#define DF4_NODE_ID_MASK GENMASK(31, 16) + +/* + * Node ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 NodeIdShift [3:0] + * + * D18F1x154 [System Fabric ID Mask 1] + * DF3p5 NodeIdShift [3:0] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 NodeIdShift [3:0] + * DF4p5 NodeIdShift [3:0] + */ +#define DF3_NODE_ID_SHIFT GENMASK(3, 0) + +/* + * Remap Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapEn [4] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapEn [4] + */ +#define DF4_REMAP_EN BIT(4) + +/* + * Remap Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapSel [7:5] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapSel [6:5] + */ +#define DF4_REMAP_SEL GENMASK(7, 5) +#define DF4p5_REMAP_SEL GENMASK(6, 5) + +/* + * Socket ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdMask [23:16] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdMask [26:24] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdMask [31:16] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 SocketIdMask [31:16] + * DF4p5 SocketIdMask [31:16] + */ +#define DF2_SOCKET_ID_MASK GENMASK(23, 16) +#define DF3_SOCKET_ID_MASK GENMASK(26, 24) +#define DF4_SOCKET_ID_MASK GENMASK(31, 16) + +/* + * Socket ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdShift [31:28] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdShift [9:8] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdShift [11:8] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 SocketIdShift [11:8] + * DF4p5 SocketIdShift [11:8] + */ +#define DF2_SOCKET_ID_SHIFT GENMASK(31, 28) +#define DF3_SOCKET_ID_SHIFT GENMASK(9, 8) +#define DF4_SOCKET_ID_SHIFT GENMASK(11, 8) diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c new file mode 100644 index 000000000000..af61f2f1d6de --- /dev/null +++ b/drivers/ras/amd/atl/system.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * system.c : Functions to read and save system-wide data + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id) +{ + u16 socket_id_bits, die_id_bits; + + if (socket_id > 0 && df_cfg.socket_id_mask == 0) { + atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x", + socket_id, df_cfg.socket_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + socket_id_bits = socket_id; + socket_id_bits <<= df_cfg.socket_id_shift; + socket_id_bits &= df_cfg.socket_id_mask; + + if (die_id > 0 && df_cfg.die_id_mask == 0) { + atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x", + die_id, df_cfg.die_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + die_id_bits = die_id; + die_id_bits <<= df_cfg.die_id_shift; + die_id_bits &= df_cfg.die_id_mask; + + ctx->node_id = (socket_id_bits | die_id_bits) >> df_cfg.node_id_shift; + return 0; +} + +static void df2_get_masks_shifts(u32 mask0) +{ + df_cfg.socket_id_shift = FIELD_GET(DF2_SOCKET_ID_SHIFT, mask0); + df_cfg.socket_id_mask = FIELD_GET(DF2_SOCKET_ID_MASK, mask0); + df_cfg.die_id_shift = FIELD_GET(DF2_DIE_ID_SHIFT, mask0); + df_cfg.die_id_mask = FIELD_GET(DF2_DIE_ID_MASK, mask0); + df_cfg.node_id_shift = df_cfg.die_id_shift; + df_cfg.node_id_mask = df_cfg.socket_id_mask | df_cfg.die_id_mask; + df_cfg.component_id_mask = ~df_cfg.node_id_mask; +} + +static void df3_get_masks_shifts(u32 mask0, u32 mask1) +{ + df_cfg.component_id_mask = FIELD_GET(DF3_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF3_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF3_SOCKET_ID_SHIFT, mask1); + df_cfg.socket_id_mask = FIELD_GET(DF3_SOCKET_ID_MASK, mask1); + df_cfg.die_id_mask = FIELD_GET(DF3_DIE_ID_MASK, mask1); +} + +static void df3p5_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df_cfg.component_id_mask = FIELD_GET(DF4_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF4_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF4_SOCKET_ID_SHIFT, mask1); + + df_cfg.socket_id_mask = FIELD_GET(DF4_SOCKET_ID_MASK, mask2); + df_cfg.die_id_mask = FIELD_GET(DF4_DIE_ID_MASK, mask2); +} + +static void df4_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df3p5_get_masks_shifts(mask0, mask1, mask2); + + if (!(df_cfg.flags.socket_id_shift_quirk && df_cfg.socket_id_shift == 1)) + return; + + df_cfg.socket_id_shift = 0; + df_cfg.socket_id_mask = 1; + df_cfg.die_id_shift = 0; + df_cfg.die_id_mask = 0; + df_cfg.node_id_shift = 8; + df_cfg.node_id_mask = 0x100; +} + +static int df4_get_fabric_id_mask_registers(void) +{ + u32 mask0, mask1, mask2; + + /* Read D18F4x1B0 (SystemFabricIdMask0) */ + if (df_indirect_read_broadcast(0, 4, 0x1B0, &mask0)) + return -EINVAL; + + /* Read D18F4x1B4 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 4, 0x1B4, &mask1)) + return -EINVAL; + + /* Read D18F4x1B8 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 4, 0x1B8, &mask2)) + return -EINVAL; + + df4_get_masks_shifts(mask0, mask1, mask2); + return 0; +} + +static int df4_determine_df_rev(u32 reg) +{ + df_cfg.rev = FIELD_GET(DF_MINOR_REVISION, reg) < 5 ? DF4 : DF4p5; + + /* Check for special cases or quirks based on Device/Vendor IDs.*/ + + /* Read D18F0x000 (DeviceVendorId0) */ + if (df_indirect_read_broadcast(0, 0, 0, ®)) + return -EINVAL; + + if (reg == DF_FUNC0_ID_ZEN4_SERVER) + df_cfg.flags.socket_id_shift_quirk = 1; + + return df4_get_fabric_id_mask_registers(); +} + +static int determine_df_rev_legacy(void) +{ + u32 fabric_id_mask0, fabric_id_mask1, fabric_id_mask2; + + /* + * Check for DF3.5. + * + * Component ID Mask must be non-zero. Register D18F1x150 is + * reserved pre-DF3.5, so value will be Read-as-Zero. + */ + + /* Read D18F1x150 (SystemFabricIdMask0). */ + if (df_indirect_read_broadcast(0, 1, 0x150, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF4_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3p5; + + /* Read D18F1x154 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x154, &fabric_id_mask1)) + return -EINVAL; + + /* Read D18F1x158 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 1, 0x158, &fabric_id_mask2)) + return -EINVAL; + + df3p5_get_masks_shifts(fabric_id_mask0, fabric_id_mask1, fabric_id_mask2); + return 0; + } + + /* + * Check for DF3. + * + * Component ID Mask must be non-zero. Field is Read-as-Zero on DF2. + */ + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (df_indirect_read_broadcast(0, 1, 0x208, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF3_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3; + + /* Read D18F1x20C (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x20C, &fabric_id_mask1)) + return -EINVAL; + + df3_get_masks_shifts(fabric_id_mask0, fabric_id_mask1); + return 0; + } + + /* Default to DF2. */ + df_cfg.rev = DF2; + df2_get_masks_shifts(fabric_id_mask0); + return 0; +} + +static int determine_df_rev(void) +{ + u32 reg; + u8 rev; + + if (df_cfg.rev != UNKNOWN) + return 0; + + /* Read D18F0x40 (FabricBlockInstanceCount). */ + if (df_indirect_read_broadcast(0, 0, 0x40, ®)) + return -EINVAL; + + /* + * Revision fields added for DF4 and later. + * + * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. + */ + rev = FIELD_GET(DF_MAJOR_REVISION, reg); + if (!rev) + return determine_df_rev_legacy(); + + /* + * Fail out for major revisions other than '4'. + * + * Explicit support should be added for newer systems to avoid issues. + */ + if (rev == 4) + return df4_determine_df_rev(reg); + + return -EINVAL; +} + +static void get_num_maps(void) +{ + switch (df_cfg.rev) { + case DF2: + case DF3: + case DF3p5: + df_cfg.num_coh_st_maps = 2; + break; + case DF4: + case DF4p5: + df_cfg.num_coh_st_maps = 4; + break; + default: + atl_debug_on_bad_df_rev(); + } +} + +static void apply_node_id_shift(void) +{ + if (df_cfg.rev == DF2) + return; + + df_cfg.die_id_shift = df_cfg.node_id_shift; + df_cfg.die_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_shift += df_cfg.node_id_shift; +} + +static void dump_df_cfg(void) +{ + pr_debug("rev=0x%x", df_cfg.rev); + + pr_debug("component_id_mask=0x%x", df_cfg.component_id_mask); + pr_debug("die_id_mask=0x%x", df_cfg.die_id_mask); + pr_debug("node_id_mask=0x%x", df_cfg.node_id_mask); + pr_debug("socket_id_mask=0x%x", df_cfg.socket_id_mask); + + pr_debug("die_id_shift=0x%x", df_cfg.die_id_shift); + pr_debug("node_id_shift=0x%x", df_cfg.node_id_shift); + pr_debug("socket_id_shift=0x%x", df_cfg.socket_id_shift); + + pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps); + + pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa); + pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk); +} + +int get_df_system_info(void) +{ + if (determine_df_rev()) { + pr_warn("amd_atl: Failed to determine DF Revision"); + df_cfg.rev = UNKNOWN; + return -EINVAL; + } + + apply_node_id_shift(); + + get_num_maps(); + + dump_df_cfg(); + + return 0; +} diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c new file mode 100644 index 000000000000..9d51e4954687 --- /dev/null +++ b/drivers/ras/amd/atl/umc.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * umc.c : Unified Memory Controller (UMC) topology helpers + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +static u8 get_die_id(struct atl_err *err) +{ + /* + * For CPUs, this is the AMD Node ID modulo the number + * of AMD Nodes per socket. + */ + return topology_die_id(err->cpu) % amd_get_nodes_per_socket(); +} + +#define UMC_CHANNEL_NUM GENMASK(31, 20) +static u8 get_coh_st_inst_id(struct atl_err *err) +{ + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); +} + +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + u8 socket_id = topology_physical_package_id(err->cpu); + u8 coh_st_inst_id = get_coh_st_inst_id(err); + unsigned long addr = err->addr; + u8 die_id = get_die_id(err); + + pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", + socket_id, die_id, coh_st_inst_id, addr); + + return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr); +} diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 95540ea8dd9d..a6e4792a1b2e 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -10,6 +10,37 @@ #include #include +#if IS_ENABLED(CONFIG_AMD_ATL) +/* + * Once set, this function pointer should never be unset. + * + * The library module will set this pointer if it successfully loads. The module + * should not be unloaded except for testing and debug purposes. + */ +static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err); + +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)) +{ + amd_atl_umc_na_to_spa = f; +} +EXPORT_SYMBOL_GPL(amd_atl_register_decoder); + +void amd_atl_unregister_decoder(void) +{ + amd_atl_umc_na_to_spa = NULL; +} +EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder); + +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + if (!amd_atl_umc_na_to_spa) + return -EINVAL; + + return amd_atl_umc_na_to_spa(err); +} +EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr); +#endif /* CONFIG_AMD_ATL */ + #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..09c632832bf1 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,19 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ -- cgit v1.2.3 From 6c9058f49084569d1d816e87185e0a4f9ab1a321 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 22 Jan 2024 22:14:00 -0600 Subject: EDAC/amd64: Use new AMD Address Translation Library Remove old address translation code and use the new AMD Address Translation Library. Use "imply" in Kconfig so that the "AMD_ATL" config option takes the value of "EDAC_AMD64" as its default. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240123041401.79812-3-yazen.ghannam@amd.com --- drivers/edac/Kconfig | 1 + drivers/edac/amd64_edac.c | 286 ++-------------------------------------------- 2 files changed, 10 insertions(+), 277 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 5a7f3fabee22..16c8de5050e5 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -78,6 +78,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE + imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 537b9987a431..ca9a8641652d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "amd64_edac.h" #include @@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m) return nid - gpu_node_map.base_node_id + 1; } -/* Protect the PCI config register pairs used for DF indirect access. */ -static DEFINE_MUTEX(df_indirect_mutex); - -/* - * Data Fabric Indirect Access uses FICAA/FICAD. - * - * Fabric Indirect Configuration Access Address (FICAA): Constructed based - * on the device's Instance Id and the PCI function and register offset of - * the desired register. - * - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO - * and FICAD HI registers but so far we only need the LO register. - * - * Use Instance Id 0xFF to indicate a broadcast read. - */ -#define DF_BROADCAST 0xFF -static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - struct pci_dev *F4; - u32 ficaa; - int err = -ENODEV; - - if (node >= amd_nb_num()) - goto out; - - F4 = node_to_amd_nb(node)->link; - if (!F4) - goto out; - - ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; - ficaa |= reg & 0x3FC; - ficaa |= (func & 0x7) << 11; - ficaa |= instance_id << 16; - - mutex_lock(&df_indirect_mutex); - - err = pci_write_config_dword(F4, 0x5C, ficaa); - if (err) { - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); - goto out_unlock; - } - - err = pci_read_config_dword(F4, 0x98, lo); - if (err) - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); - -out_unlock: - mutex_unlock(&df_indirect_mutex); - -out: - return err; -} - -static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - return __df_indirect_read(node, func, reg, instance_id, lo); -} - -static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) -{ - return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); -} - -struct addr_ctx { - u64 ret_addr; - u32 tmp; - u16 nid; - u8 inst_id; -}; - -static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) -{ - u64 dram_base_addr, dram_limit_addr, dram_hole_base; - - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; - u8 intlv_addr_sel, intlv_addr_bit; - u8 num_intlv_bits, hashed_bit; - u8 lgcy_mmio_hole_en, base = 0; - u8 cs_mask, cs_id = 0; - bool hash_enabled = false; - - struct addr_ctx ctx; - - memset(&ctx, 0, sizeof(ctx)); - - /* Start from the normalized address */ - ctx.ret_addr = norm_addr; - - ctx.nid = nid; - ctx.inst_id = umc; - - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) - goto out_err; - - /* Remove HiAddrOffset from normalized address, if enabled: */ - if (ctx.tmp & BIT(0)) { - u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; - - if (norm_addr >= hi_addr_offset) { - ctx.ret_addr -= hi_addr_offset; - base = 1; - } - } - - /* Read D18F0x110 (DramBaseAddress). */ - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - /* Check if address range is valid. */ - if (!(ctx.tmp & BIT(0))) { - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, ctx.tmp); - goto out_err; - } - - lgcy_mmio_hole_en = ctx.tmp & BIT(1); - intlv_num_chan = (ctx.tmp >> 4) & 0xF; - intlv_addr_sel = (ctx.tmp >> 8) & 0x7; - dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; - - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ - if (intlv_addr_sel > 3) { - pr_err("%s: Invalid interleave address select %d.\n", - __func__, intlv_addr_sel); - goto out_err; - } - - /* Read D18F0x114 (DramLimitAddress). */ - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) - goto out_err; - - intlv_num_sockets = (ctx.tmp >> 8) & 0x1; - intlv_num_dies = (ctx.tmp >> 10) & 0x3; - dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); - - intlv_addr_bit = intlv_addr_sel + 8; - - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ - switch (intlv_num_chan) { - case 0: intlv_num_chan = 0; break; - case 1: intlv_num_chan = 1; break; - case 3: intlv_num_chan = 2; break; - case 5: intlv_num_chan = 3; break; - case 7: intlv_num_chan = 4; break; - - case 8: intlv_num_chan = 1; - hash_enabled = true; - break; - default: - pr_err("%s: Invalid number of interleaved channels %d.\n", - __func__, intlv_num_chan); - goto out_err; - } - - num_intlv_bits = intlv_num_chan; - - if (intlv_num_dies > 2) { - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", - __func__, intlv_num_dies); - goto out_err; - } - - num_intlv_bits += intlv_num_dies; - - /* Add a bit if sockets are interleaved. */ - num_intlv_bits += intlv_num_sockets; - - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { - pr_err("%s: Invalid interleave bits %d.\n", - __func__, num_intlv_bits); - goto out_err; - } - - if (num_intlv_bits > 0) { - u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; - - /* - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. - * This is the fabric id for this coherent slave. Use - * umc/channel# as instance id of the coherent slave - * for FICAA. - */ - if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) - goto out_err; - - cs_fabric_id = (ctx.tmp >> 8) & 0xFF; - die_id_bit = 0; - - /* If interleaved over more than 1 channel: */ - if (intlv_num_chan) { - die_id_bit = intlv_num_chan; - cs_mask = (1 << die_id_bit) - 1; - cs_id = cs_fabric_id & cs_mask; - } - - sock_id_bit = die_id_bit; - - /* Read D18F1x208 (SystemFabricIdMask). */ - if (intlv_num_dies || intlv_num_sockets) - if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) - goto out_err; - - /* If interleaved over more than 1 die. */ - if (intlv_num_dies) { - sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (ctx.tmp >> 24) & 0xF; - die_id_mask = (ctx.tmp >> 8) & 0xFF; - - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; - } - - /* If interleaved over more than 1 socket. */ - if (intlv_num_sockets) { - socket_id_shift = (ctx.tmp >> 28) & 0xF; - socket_id_mask = (ctx.tmp >> 16) & 0xFF; - - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; - } - - /* - * The pre-interleaved address consists of XXXXXXIIIYYYYY - * where III is the ID for this CS, and XXXXXXYYYYY are the - * address bits from the post-interleaved address. - * "num_intlv_bits" has been calculated to tell us how many "I" - * bits there are. "intlv_addr_bit" tells us how many "Y" bits - * there are (where "I" starts). - */ - temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); - temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; - } - - /* Add dram base address */ - ctx.ret_addr += dram_base_addr; - - /* If legacy MMIO hole enabled */ - if (lgcy_mmio_hole_en) { - if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) - goto out_err; - - dram_hole_base = ctx.tmp & GENMASK(31, 24); - if (ctx.ret_addr >= dram_hole_base) - ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); - } - - if (hash_enabled) { - /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ctx.ret_addr >> 12) ^ - (ctx.ret_addr >> 18) ^ - (ctx.ret_addr >> 21) ^ - (ctx.ret_addr >> 30) ^ - cs_id; - - hashed_bit &= BIT(0); - - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) - ctx.ret_addr ^= BIT(intlv_addr_bit); - } - - /* Is calculated system address is above DRAM limit address? */ - if (ctx.ret_addr > dram_limit_addr) - goto out_err; - - *sys_addr = ctx.ret_addr; - return 0; - -out_err: - return -EINVAL; -} - static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* @@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m) { u8 ecc_type = (m->status >> 45) & 0x3; struct mem_ctl_info *mci; + unsigned long sys_addr; struct amd64_pvt *pvt; + struct atl_err a_err; struct err_info err; - u64 sys_addr; node_id = fixup_node_id(node_id, m); @@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_err_info(m, &err); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + a_err.addr = m->addr; + a_err.ipid = m->ipid; + a_err.cpu = m->extcpu; + + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; } -- cgit v1.2.3 From a0c446dc4d9365a24d81f2ee024bdde46e40365f Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 22 Jan 2024 16:57:12 +0800 Subject: irqchip/gic-v3: Use readl_relaxed_poll_timeout_atomic() Replace the open coded register polling loop with readl_relaxed_poll_timeout_atomic() which provides the same functionality. Signed-off-by: Dawei Li Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240122085716.2999875-2-dawei.li@shingroup.cn --- drivers/irqchip/irq-gic-v3.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 98b0329b7154..65cbf378eec4 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -251,17 +252,13 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { - u32 count = 1000000; /* 1s! */ + u32 val; + int ret; - while (readl_relaxed(base + GICD_CTLR) & bit) { - count--; - if (!count) { - pr_err_ratelimited("RWP timeout, gone fishing\n"); - return; - } - cpu_relax(); - udelay(1); - } + ret = readl_relaxed_poll_timeout_atomic(base + GICD_CTLR, val, !(val & bit), + 1, USEC_PER_SEC); + if (ret == -ETIMEDOUT) + pr_err_ratelimited("RWP timeout, gone fishing\n"); } /* Wait for completion of a distributor change */ @@ -279,8 +276,8 @@ static void gic_redist_wait_for_rwp(void) static void gic_enable_redist(bool enable) { void __iomem *rbase; - u32 count = 1000000; /* 1s! */ u32 val; + int ret; if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) return; @@ -301,16 +298,13 @@ static void gic_enable_redist(bool enable) return; /* No PM support in this redistributor */ } - while (--count) { - val = readl_relaxed(rbase + GICR_WAKER); - if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) - break; - cpu_relax(); - udelay(1); - } - if (!count) + ret = readl_relaxed_poll_timeout_atomic(rbase + GICR_WAKER, val, + enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep), + 1, USEC_PER_SEC); + if (ret == -ETIMEDOUT) { pr_err_ratelimited("redistributor failed to %s...\n", enable ? "wakeup" : "sleep"); + } } /* -- cgit v1.2.3 From d22083a5f09b2066728a91f3abb71284451247b1 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 22 Jan 2024 16:57:13 +0800 Subject: irqchip/gic(v3): Replace gic_irq() with irqd_to_hwirq() GIC & GIC-v3 share same gic_irq() implementations, both of which serve exact same purpose as irqd_to_hwirq(). irqd_to_hwirq() is a generic and top level API of the interrupt subsystem, it's independent of any chip implementation. Replace gic_irq() with irqd_to_hwirq() and convert struct irq_data::hwirq to irq_hw_number_t explicitly. Suggested-by: Marc Zyngier Signed-off-by: Dawei Li Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240122085716.2999875-3-dawei.li@shingroup.cn --- drivers/irqchip/irq-gic-v3.c | 19 +++++++------------ drivers/irqchip/irq-gic.c | 27 ++++++++++++--------------- include/linux/irq.h | 2 +- 3 files changed, 20 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 65cbf378eec4..20a75f0353cd 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -181,11 +181,6 @@ static enum gic_intid_range get_intid_range(struct irq_data *d) return __get_intid_range(d->hwirq); } -static inline unsigned int gic_irq(struct irq_data *d) -{ - return d->hwirq; -} - static inline bool gic_irq_in_rdist(struct irq_data *d) { switch (get_intid_range(d)) { @@ -542,7 +537,7 @@ static int gic_irq_nmi_setup(struct irq_data *d) * A secondary irq_chip should be in charge of LPI request, * it should not be possible to get there */ - if (WARN_ON(gic_irq(d) >= 8192)) + if (WARN_ON(irqd_to_hwirq(d) >= 8192)) return -EINVAL; /* desc lock should already be held */ @@ -582,7 +577,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d) * A secondary irq_chip should be in charge of LPI request, * it should not be possible to get there */ - if (WARN_ON(gic_irq(d) >= 8192)) + if (WARN_ON(irqd_to_hwirq(d) >= 8192)) return; /* desc lock should already be held */ @@ -620,7 +615,7 @@ static bool gic_arm64_erratum_2941627_needed(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + write_gicreg(irqd_to_hwirq(d), ICC_EOIR1_EL1); isb(); if (gic_arm64_erratum_2941627_needed(d)) { @@ -640,19 +635,19 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d) * No need to deactivate an LPI, or an interrupt that * is is getting forwarded to a vcpu. */ - if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) + if (irqd_to_hwirq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) return; if (!gic_arm64_erratum_2941627_needed(d)) - gic_write_dir(gic_irq(d)); + gic_write_dir(irqd_to_hwirq(d)); else gic_poke_irq(d, GICD_ICACTIVER); } static int gic_set_type(struct irq_data *d, unsigned int type) { + irq_hw_number_t irq = irqd_to_hwirq(d); enum gic_intid_range range; - unsigned int irq = gic_irq(d); void __iomem *base; u32 offset, index; int ret; @@ -678,7 +673,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) ret = gic_configure_irq(index, type, base + offset, NULL); if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) { /* Misconfigured PPIs are usually not fatal */ - pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq); + pr_warn("GIC: PPI INTID%ld is secure or misconfigured\n", irq); ret = 0; } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 412196a7dad5..98aa383e39db 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -162,11 +162,6 @@ static inline void __iomem *gic_cpu_base(struct irq_data *d) return gic_data_cpu_base(gic_data); } -static inline unsigned int gic_irq(struct irq_data *d) -{ - return d->hwirq; -} - static inline bool cascading_gic_irq(struct irq_data *d) { void *data = irq_data_get_irq_handler_data(d); @@ -183,14 +178,16 @@ static inline bool cascading_gic_irq(struct irq_data *d) */ static void gic_poke_irq(struct irq_data *d, u32 offset) { - u32 mask = 1 << (gic_irq(d) % 32); - writel_relaxed(mask, gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4); + u32 mask = 1 << (irqd_to_hwirq(d) % 32); + + writel_relaxed(mask, gic_dist_base(d) + offset + (irqd_to_hwirq(d) / 32) * 4); } static int gic_peek_irq(struct irq_data *d, u32 offset) { - u32 mask = 1 << (gic_irq(d) % 32); - return !!(readl_relaxed(gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4) & mask); + u32 mask = 1 << (irqd_to_hwirq(d) % 32); + + return !!(readl_relaxed(gic_dist_base(d) + offset + (irqd_to_hwirq(d) / 32) * 4) & mask); } static void gic_mask_irq(struct irq_data *d) @@ -220,7 +217,7 @@ static void gic_unmask_irq(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - u32 hwirq = gic_irq(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); if (hwirq < 16) hwirq = this_cpu_read(sgi_intid); @@ -230,7 +227,7 @@ static void gic_eoi_irq(struct irq_data *d) static void gic_eoimode1_eoi_irq(struct irq_data *d) { - u32 hwirq = gic_irq(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); /* Do not deactivate an IRQ forwarded to a vcpu. */ if (irqd_is_forwarded_to_vcpu(d)) @@ -293,8 +290,8 @@ static int gic_irq_get_irqchip_state(struct irq_data *d, static int gic_set_type(struct irq_data *d, unsigned int type) { + irq_hw_number_t gicirq = irqd_to_hwirq(d); void __iomem *base = gic_dist_base(d); - unsigned int gicirq = gic_irq(d); int ret; /* Interrupt configuration for SGIs can't be changed */ @@ -309,7 +306,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) ret = gic_configure_irq(gicirq, type, base + GIC_DIST_CONFIG, NULL); if (ret && gicirq < 32) { /* Misconfigured PPIs are usually not fatal */ - pr_warn("GIC: PPI%d is secure or misconfigured\n", gicirq - 16); + pr_warn("GIC: PPI%ld is secure or misconfigured\n", gicirq - 16); ret = 0; } @@ -319,7 +316,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { /* Only interrupts on the primary GIC can be forwarded to a vcpu. */ - if (cascading_gic_irq(d) || gic_irq(d) < 16) + if (cascading_gic_irq(d) || irqd_to_hwirq(d) < 16) return -EINVAL; if (vcpu) @@ -796,7 +793,7 @@ static void rmw_writeb(u8 bval, void __iomem *addr) static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { - void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d); + void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + irqd_to_hwirq(d); struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); unsigned int cpu; diff --git a/include/linux/irq.h b/include/linux/irq.h index 90081afa10ce..97baa937ab5b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -179,7 +179,7 @@ struct irq_common_data { struct irq_data { u32 mask; unsigned int irq; - unsigned long hwirq; + irq_hw_number_t hwirq; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; -- cgit v1.2.3 From 453f0ae797328e675840466c80e5b268d7feb9ba Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Sun, 28 Jan 2024 09:59:50 -0600 Subject: RAS/AMD/ATL: Add MI300 support AMD MI300 systems include on-die HBM3 memory and a unique topology. And they fall under Data Fabric version 4.5 in overall design. Generally, topology information (IDs, etc.) is gathered from Data Fabric registers. However, the unique topology for MI300 means that some topology information is fixed in hardware and follows arbitrary mappings. Furthermore, not all hardware instances are software-visible, so register accesses must be adjusted. Recognize and add helper functions for the new MI300 interleave modes. Add lookup tables for fixed values where appropriate. Adjust how Die and Node IDs are found and used. Also, fix some register bitmasks that were mislabeled. Signed-off-by: Muralidhara M K Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240128155950.1434067-1-yazen.ghannam@amd.com --- drivers/ras/amd/atl/access.c | 27 ++++++++++ drivers/ras/amd/atl/dehash.c | 95 ++++++++++++++++++++++++++++++++++- drivers/ras/amd/atl/denormalize.c | 102 ++++++++++++++++++++++++++++++++++++++ drivers/ras/amd/atl/internal.h | 10 +++- drivers/ras/amd/atl/map.c | 17 +++++++ drivers/ras/amd/atl/reg_fields.h | 9 ++-- drivers/ras/amd/atl/system.c | 3 ++ drivers/ras/amd/atl/umc.c | 51 +++++++++++++++++++ 8 files changed, 309 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c index f6dd87bb2c35..ee4661ed28ba 100644 --- a/drivers/ras/amd/atl/access.c +++ b/drivers/ras/amd/atl/access.c @@ -36,6 +36,32 @@ static DEFINE_MUTEX(df_indirect_mutex); #define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) +static u16 get_accessible_node(u16 node) +{ + /* + * On heterogeneous systems, not all AMD Nodes are accessible + * through software-visible registers. The Node ID needs to be + * adjusted for register accesses. But its value should not be + * changed for the translation methods. + */ + if (df_cfg.flags.heterogeneous) { + /* Only Node 0 is accessible on DF3.5 systems. */ + if (df_cfg.rev == DF3p5) + node = 0; + + /* + * Only the first Node in each Socket is accessible on + * DF4.5 systems, and this is visible to software as one + * Fabric per Socket. The Socket ID can be derived from + * the Node ID and global shift values. + */ + if (df_cfg.rev == DF4p5) + node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift; + } + + return node; +} + static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) { u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; @@ -43,6 +69,7 @@ static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *l int err = -ENODEV; u32 ficaa = 0; + node = get_accessible_node(node); if (node >= amd_nb_num()) goto out; diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c index 6f414926e6fe..4ea46262c4f5 100644 --- a/drivers/ras/amd/atl/dehash.c +++ b/drivers/ras/amd/atl/dehash.c @@ -253,7 +253,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); - hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); /* * Generate a unique address to determine which bits @@ -343,6 +343,94 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) return 0; } +/* + * MI300 hash bits + * 4K 64K 2M 1G 1T 1T + * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43} + * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44} + * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45} + * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46} + * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack + * DieID[0] = XOR of addr{12, 20, 27, 34, 41 } + * DieID[1] = XOR of addr{13, 21, 28, 35, 42 } + */ +static int mi300_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hashed_bit, intlv_bit, test_bit; + u8 num_intlv_bits, base_bit, i; + + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + return -EINVAL; + + hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* Channel bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 8 + i; + + /* COH_ST_Select[4] jumps to a base bit of 14. */ + if (i == 4) + base_bit = 14; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + /* 4k hash bit only applies to the first 3 bits. */ + if (i <= 2) { + test_bit = BIT_ULL(12 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_4k; + } + + /* Use temporary 'test_bit' value to avoid Sparse warnings. */ + test_bit = BIT_ULL(15 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(22 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(29 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(36 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + test_bit = BIT_ULL(43 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + /* Die bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_dies); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 12 + i; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + test_bit = BIT_ULL(20 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(27 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(34 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(41 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + return 0; +} + int dehash_address(struct addr_ctx *ctx) { switch (ctx->map.intlv_mode) { @@ -400,6 +488,11 @@ int dehash_address(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return df4p5_dehash_addr(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return mi300_dehash_addr(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return -EINVAL; diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index 01f1d0fb6799..d5d0e1fda159 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -80,6 +80,40 @@ static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); } +/* + * Make space for CS ID at bits [14:8] as follows: + * + * 8 channels -> bits [10:8] + * 16 channels -> bits [11:8] + * 32 channels -> bits [14,11:8] + * + * 1 die -> N/A + * 2 dies -> bit [12] + * 4 dies -> bits [13:12] + */ +static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u64 denorm_addr; + + if (ctx->map.intlv_bit_pos != 8) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return ~0ULL; + } + + /* Channel bits. Covers up to 4 bits at [11:8]. */ + denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); + + /* Die bits. Always starts at [12]. */ + denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); + + /* Additional channel bit at [14]. */ + if (num_intlv_bits > 4) + denorm_addr = expand_bits(14, 1, denorm_addr); + + return denorm_addr; +} + /* * Take the current calculated address and shift enough bits in the middle * to make a gap where the interleave bits will be inserted. @@ -107,6 +141,12 @@ static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_8CHAN_2K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return make_space_for_coh_st_id_split_2_1(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return make_space_for_coh_st_id_mi300(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return ~0ULL; @@ -204,6 +244,32 @@ static u16 get_coh_st_id_df4(struct addr_ctx *ctx) return coh_st_id; } +/* + * MI300 hash has: + * (C)hannel[3:0] = coh_st_id[3:0] + * (S)tack[0] = coh_st_id[4] + * (D)ie[1:0] = coh_st_id[6:5] + * + * Hashed coh_st_id is swizzled so that Stack bit is at the end. + * coh_st_id = SDDCCCC + */ +static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 channel_bits, die_bits, stack_bit; + u16 die_id; + + /* Subtract the "base" Destination Fabric ID. */ + ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); + + die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; + + channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); + stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; + die_bits = die_id << 4; + + return stack_bit | die_bits | channel_bits; +} + /* * Derive the correct Coherent Station ID that represents the interleave bits * used within the system physical address. This accounts for the @@ -237,6 +303,11 @@ static u16 calculate_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return get_coh_st_id_df4(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return get_coh_st_id_mi300(ctx); + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ case DF4p5_NPS2_4CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_1K_HASH: @@ -287,6 +358,9 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id case NOHASH_8CHAN: case NOHASH_16CHAN: case NOHASH_32CHAN: + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: case DF2_2CHAN_HASH: return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); @@ -314,6 +388,31 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id } } +/* + * MI300 systems have a fixed, hardware-defined physical-to-logical + * Coherent Station mapping. The Remap registers are not used. + */ +static const u16 phy_to_log_coh_st_map_mi300[] = { + 12, 13, 14, 15, + 8, 9, 10, 11, + 4, 5, 6, 7, + 0, 1, 2, 3, + 28, 29, 30, 31, + 24, 25, 26, 27, + 20, 21, 22, 23, + 16, 17, 18, 19, +}; + +static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) +{ + if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { + atl_debug(ctx, "Instance ID out of range"); + return ~0; + } + + return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); +} + static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) { u16 component_id, log_fabric_id; @@ -321,6 +420,9 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) /* Start with the physical COH_ST Fabric ID. */ u16 phys_fabric_id = ctx->coh_st_fabric_id; + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_logical_coh_st_fabric_id_mi300(ctx); + /* Skip logical ID lookup if remapping is disabled. */ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && ctx->map.intlv_mode != DF3_6CHAN) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 13f1b6098c96..21d45755e5f2 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -27,8 +27,12 @@ /* PCI ID for Zen4 Server DF Function 0. */ #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 +/* PCI IDs for MI300 DF Function 0. */ +#define DF_FUNC0_ID_MI300 0x15281022 + /* Shift needed for adjusting register values to true values. */ #define DF_DRAM_BASE_LIMIT_LSB 28 +#define MI300_DRAM_LIMIT_LSB 20 enum df_revisions { UNKNOWN, @@ -59,6 +63,9 @@ enum intlv_modes { DF4_NPS1_12CHAN_HASH = 0x15, DF4_NPS2_5CHAN_HASH = 0x16, DF4_NPS1_10CHAN_HASH = 0x17, + MI3_HASH_8CHAN = 0x18, + MI3_HASH_16CHAN = 0x19, + MI3_HASH_32CHAN = 0x1A, DF2_2CHAN_HASH = 0x21, /* DF4.5 modes are all IntLvNumChan + 0x20 */ DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, @@ -86,7 +93,8 @@ enum intlv_modes { struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, - __reserved_0 : 6; + heterogeneous : 1, + __reserved_0 : 5; }; struct df_config { diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 33f549b6255a..8b908e8d7495 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -63,6 +63,10 @@ static int df4p5_get_intlv_mode(struct addr_ctx *ctx) if (ctx->map.intlv_mode <= NOHASH_32CHAN) return 0; + if (ctx->map.intlv_mode >= MI3_HASH_8CHAN && + ctx->map.intlv_mode <= MI3_HASH_32CHAN) + return 0; + /* * Modes matching the ranges above are returned as-is. * @@ -125,6 +129,9 @@ static u64 get_hi_addr_offset(u32 reg_dram_offset) atl_debug_on_bad_df_rev(); } + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + shift = MI300_DRAM_LIMIT_LSB; + return hi_addr_offset << shift; } @@ -369,6 +376,13 @@ static int get_coh_st_fabric_id(struct addr_ctx *ctx) { u32 reg; + /* + * On MI300 systems, the Coherent Station Fabric ID is derived + * later. And it does not depend on the register value. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return 0; + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) return -EINVAL; @@ -490,6 +504,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case NOHASH_8CHAN: case DF3_COD1_8CHAN_HASH: case DF4_NPS1_8CHAN_HASH: + case MI3_HASH_8CHAN: case DF4p5_NPS1_8CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_2K_HASH: return 8; @@ -502,6 +517,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS1_12CHAN_2K_HASH: return 12; case NOHASH_16CHAN: + case MI3_HASH_16CHAN: case DF4p5_NPS1_16CHAN_1K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return 16; @@ -509,6 +525,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS0_24CHAN_2K_HASH: return 24; case NOHASH_32CHAN: + case MI3_HASH_32CHAN: return 32; default: atl_debug_on_bad_intlv_mode(ctx); diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h index 6aaa5093f42c..9dcdf6e4a856 100644 --- a/drivers/ras/amd/atl/reg_fields.h +++ b/drivers/ras/amd/atl/reg_fields.h @@ -246,11 +246,11 @@ #define DF3_HASH_CTL_64K BIT(20) #define DF3_HASH_CTL_2M BIT(21) #define DF3_HASH_CTL_1G BIT(22) -#define DF4_HASH_CTL_4K BIT(7) #define DF4_HASH_CTL_64K BIT(8) #define DF4_HASH_CTL_2M BIT(9) #define DF4_HASH_CTL_1G BIT(10) -#define DF4_HASH_CTL_1T BIT(15) +#define DF4p5_HASH_CTL_4K BIT(7) +#define DF4p5_HASH_CTL_1T BIT(15) /* * High Address Offset @@ -268,10 +268,13 @@ * D18F7x140 [DRAM Offset] * DF4 HiAddrOffset [24:1] * DF4p5 HiAddrOffset [24:1] + * MI300 HiAddrOffset [31:1] */ #define DF2_HI_ADDR_OFFSET GENMASK(31, 20) #define DF3_HI_ADDR_OFFSET GENMASK(31, 12) -#define DF4_HI_ADDR_OFFSET GENMASK(24, 1) + +/* Follow reference code by including reserved bits for simplicity. */ +#define DF4_HI_ADDR_OFFSET GENMASK(31, 1) /* * High Address Offset Enable diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index af61f2f1d6de..46493ed405d6 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -124,6 +124,9 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_ZEN4_SERVER) df_cfg.flags.socket_id_shift_quirk = 1; + if (reg == DF_FUNC0_ID_MI300) + df_cfg.flags.heterogeneous = 1; + return df4_get_fabric_id_mask_registers(); } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 9d51e4954687..7bfa21a582f0 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -12,8 +12,56 @@ #include "internal.h" +/* + * MI300 has a fixed, model-specific mapping between a UMC instance and + * its related Data Fabric Coherent Station instance. + * + * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the + * UMC instance within a Node. Use this to find the appropriate Coherent + * Station ID. + * + * Redundant bits were removed from the map below. + */ +static const u16 umc_coh_st_map[32] = { + 0x393, 0x293, 0x193, 0x093, + 0x392, 0x292, 0x192, 0x092, + 0x391, 0x291, 0x191, 0x091, + 0x390, 0x290, 0x190, 0x090, + 0x793, 0x693, 0x593, 0x493, + 0x792, 0x692, 0x592, 0x492, + 0x791, 0x691, 0x591, 0x491, + 0x790, 0x690, 0x590, 0x490, +}; + +#define UMC_ID_MI300 GENMASK(23, 12) +static u8 get_coh_st_inst_id_mi300(struct atl_err *err) +{ + u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid); + u8 i; + + for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) { + if (umc_id == umc_coh_st_map[i]) + break; + } + + WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map)); + + return i; +} + +#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) static u8 get_die_id(struct atl_err *err) { + /* + * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this + * needs to be divided by 4 to get the internal Die ID. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) { + u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid); + + return node_id >> 2; + } + /* * For CPUs, this is the AMD Node ID modulo the number * of AMD Nodes per socket. @@ -24,6 +72,9 @@ static u8 get_die_id(struct atl_err *err) #define UMC_CHANNEL_NUM GENMASK(31, 20) static u8 get_coh_st_inst_id(struct atl_err *err) { + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_coh_st_inst_id_mi300(err); + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); } -- cgit v1.2.3 From a7b57372e1c5c848cbe9169574f07a9ee2177a1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 31 Jan 2024 11:24:25 +0300 Subject: RAS/AMD/ATL: Fix array overflow in get_logical_coh_st_fabric_id_mi300() Check against ARRAY_SIZE() which is the number of elements instead of sizeof() which is the number of bytes. Fixes: 453f0ae79732 ("RAS/AMD/ATL: Add MI300 support") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/279c8b5e-6c00-467a-9071-9c67926abea4@moroto.mountain --- drivers/ras/amd/atl/denormalize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index d5d0e1fda159..49a900e066f1 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -405,7 +405,7 @@ static const u16 phy_to_log_coh_st_map_mi300[] = { static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) { - if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { + if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) { atl_debug(ctx, "Instance ID out of range"); return ~0; } -- cgit v1.2.3 From 8f4a29b0e8a40d865040800684d7ff4141c1394f Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Dec 2023 02:50:16 -0800 Subject: x86/traps: Add sysvec_install() to install a system interrupt handler Add sysvec_install() to install a system interrupt handler into the IDT or the FRED system interrupt handler table. Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-28-xin3.li@intel.com --- arch/x86/entry/entry_fred.c | 14 ++++++++++++++ arch/x86/include/asm/desc.h | 2 -- arch/x86/include/asm/idtentry.h | 15 +++++++++++++++ arch/x86/kernel/cpu/acrn.c | 4 ++-- arch/x86/kernel/cpu/mshyperv.c | 15 +++++++-------- arch/x86/kernel/idt.c | 4 ++-- arch/x86/kernel/kvm.c | 2 +- drivers/xen/events/events_base.c | 2 +- 8 files changed, 42 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index 125b62311b31..3be0269bc0d4 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -119,6 +119,20 @@ static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = { SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), }; +static bool fred_setup_done __initdata; + +void __init fred_install_sysvec(unsigned int sysvec, idtentry_t handler) +{ + if (WARN_ON_ONCE(sysvec < FIRST_SYSTEM_VECTOR)) + return; + + if (WARN_ON_ONCE(fred_setup_done)) + return; + + if (!WARN_ON_ONCE(sysvec_table[sysvec - FIRST_SYSTEM_VECTOR])) + sysvec_table[sysvec - FIRST_SYSTEM_VECTOR] = handler; +} + static noinstr void fred_extint(struct pt_regs *regs) { unsigned int vector = regs->fred_ss.vector; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ab97b22ac04a..ec95fe44fa3a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -402,8 +402,6 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -void alloc_intr_gate(unsigned int n, const void *addr); - static inline void init_idt_data(struct idt_data *data, unsigned int n, const void *addr) { diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 570f286ca7dd..47d4c04d103d 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -459,6 +459,21 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW #endif +void idt_install_sysvec(unsigned int n, const void *function); + +#ifdef CONFIG_X86_FRED +void fred_install_sysvec(unsigned int vector, const idtentry_t function); +#else +static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { } +#endif + +#define sysvec_install(vector, function) { \ + if (cpu_feature_enabled(X86_FEATURE_FRED)) \ + fred_install_sysvec(vector, function); \ + else \ + idt_install_sysvec(vector, asm_##function); \ +} + #else /* !__ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index bfeb18fad63f..2c5b51aad91a 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -26,8 +26,8 @@ static u32 __init acrn_detect(void) static void __init acrn_init_platform(void) { - /* Setup the IDT for ACRN hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback); + /* Install system interrupt handler for ACRN hypervisor callback */ + sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); x86_platform.calibrate_tsc = acrn_get_tsc_khz; x86_platform.calibrate_cpu = acrn_get_tsc_khz; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 01fa06dd06b6..45e0e70e238c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -539,19 +539,18 @@ static void __init ms_hyperv_init_platform(void) */ x86_platform.apic_post_init = hyperv_init; hyperv_setup_mmu_ops(); - /* Setup the IDT for hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback); - /* Setup the IDT for reenlightenment notifications */ + /* Install system interrupt handler for hypervisor callback */ + sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); + + /* Install system interrupt handler for reenlightenment notifications */ if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) { - alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, - asm_sysvec_hyperv_reenlightenment); + sysvec_install(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); } - /* Setup the IDT for stimer0 */ + /* Install system interrupt handler for stimer0 */ if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) { - alloc_intr_gate(HYPERV_STIMER0_VECTOR, - asm_sysvec_hyperv_stimer0); + sysvec_install(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); } # ifdef CONFIG_SMP diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 660b601f1d6c..0cd53fa8c65d 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -337,7 +337,7 @@ void idt_invalidate(void) load_idt(&idt); } -void __init alloc_intr_gate(unsigned int n, const void *addr) +void __init idt_install_sysvec(unsigned int n, const void *function) { if (WARN_ON(n < FIRST_SYSTEM_VECTOR)) return; @@ -346,5 +346,5 @@ void __init alloc_intr_gate(unsigned int n, const void *addr) return; if (!WARN_ON(test_and_set_bit(n, system_vectors))) - set_intr_gate(n, addr); + set_intr_gate(n, function); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dfe9945b9bec..b05557918ae2 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -829,7 +829,7 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { static_branch_enable(&kvm_async_pf_enabled); - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt); + sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt); } #ifdef CONFIG_SMP diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b8cfea7812d6..e2813bac92d4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -2216,7 +2216,7 @@ static __init void xen_alloc_callback_vector(void) return; pr_info("Xen HVM callback vector for event delivery is enabled\n"); - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback); + sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); } #else void xen_setup_callback_vector(void) {} -- cgit v1.2.3 From 87a61237530769d5a7a750fbc747ac0d1b2e18c1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 31 Jan 2024 10:57:32 -0600 Subject: RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support Zen-based AMD systems report DRAM ECC errors through Unified Memory Controller (UMC) MCA banks. The value provided in MCA_ADDR is a "normalized" address which represents the UMC's view of its managed memory. The normalized address must be translated to a system physical address for software to take action. MI300 systems, uniquely, do not provide a normalized address in MCA_ADDR for DRAM ECC errors. Rather, the "DRAM" address is reported. This value includes identifiers for the bank, row, column, pseudochannel and stack of the memory location. The DRAM address must be converted to a normalized address in order to be further translated to a system physical address. Add helper functions to do the DRAM to normalized translation for MI300 systems. The method is based on the fixed hardware layout of the on-chip memory. [ bp: Massage commit message, decapitalize some, rename function. ] Signed-off-by: Yazen Ghannam Co-developed-by: Muralidhara M K Signed-off-by: Muralidhara M K Signed-off-by: Borislav Petkov (AMD) Tested-by: Muralidhara M K Link: https://lore.kernel.org/r/20240131165732.88297-1-yazen.ghannam@amd.com --- drivers/ras/amd/atl/internal.h | 1 + drivers/ras/amd/atl/system.c | 6 +- drivers/ras/amd/atl/umc.c | 200 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 205 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 21d45755e5f2..5de69e0bb0f9 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -224,6 +224,7 @@ int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); int get_df_system_info(void); int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); +int get_addr_hash_mi300(void); int get_address_map(struct addr_ctx *ctx); diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 46493ed405d6..701349e84942 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -124,9 +124,13 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_ZEN4_SERVER) df_cfg.flags.socket_id_shift_quirk = 1; - if (reg == DF_FUNC0_ID_MI300) + if (reg == DF_FUNC0_ID_MI300) { df_cfg.flags.heterogeneous = 1; + if (get_addr_hash_mi300()) + return -EINVAL; + } + return df4_get_fabric_id_mask_registers(); } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 7bfa21a582f0..7e310d1dfcfc 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -49,6 +49,204 @@ static u8 get_coh_st_inst_id_mi300(struct atl_err *err) return i; } +/* XOR the bits in @val. */ +static u16 bitwise_xor_bits(u16 val) +{ + u16 tmp = 0; + u8 i; + + for (i = 0; i < 16; i++) + tmp ^= (val >> i) & 0x1; + + return tmp; +} + +struct xor_bits { + bool xor_enable; + u16 col_xor; + u32 row_xor; +}; + +#define NUM_BANK_BITS 4 + +static struct { + /* UMC::CH::AddrHashBank */ + struct xor_bits bank[NUM_BANK_BITS]; + + /* UMC::CH::AddrHashPC */ + struct xor_bits pc; + + /* UMC::CH::AddrHashPC2 */ + u8 bank_xor; +} addr_hash; + +#define MI300_UMC_CH_BASE 0x90000 +#define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) +#define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) +#define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) + +#define ADDR_HASH_XOR_EN BIT(0) +#define ADDR_HASH_COL_XOR GENMASK(13, 1) +#define ADDR_HASH_ROW_XOR GENMASK(31, 14) +#define ADDR_HASH_BANK_XOR GENMASK(5, 0) + +/* + * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used + * for hashing. Do this during module init, since the values will not + * change during run time. + * + * These registers are instantiated for each UMC across each AMD Node. + * However, they should be identically programmed due to the fixed hardware + * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a + * single global structure for simplicity. + */ +int get_addr_hash_mi300(void) +{ + u32 temp; + int ret; + u8 i; + + for (i = 0; i < NUM_BANK_BITS; i++) { + ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp); + if (ret) + return ret; + + addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.bank[i].col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.bank[i].row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + } + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp); + if (ret) + return ret; + + addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.pc.col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.pc.row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp); + if (ret) + return ret; + + addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + + return 0; +} + +/* + * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must + * be converted to the intermediate normalized address (NA) before translating to a + * system physical address. + * + * The DRAM address includes bank, row, and column. Also included are bits for + * pseudochannel (PC) and stack ID (SID). + * + * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero + * + * The MCA address format is as follows: + * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} + * + * The normalized address format is fixed in hardware and is as follows: + * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} + * + * Additionally, the PC and Bank bits may be hashed. This must be accounted for before + * reconstructing the normalized address. + */ +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_BANK GENMASK(9, 6) +#define MI300_UMC_MCA_ROW GENMASK(24, 10) +#define MI300_UMC_MCA_PC BIT(25) +#define MI300_UMC_MCA_SID GENMASK(27, 26) + +#define MI300_NA_COL_1_0 GENMASK(6, 5) +#define MI300_NA_PC BIT(7) +#define MI300_NA_COL_3_2 GENMASK(9, 8) +#define MI300_NA_BANK_3_2 GENMASK(11, 10) +#define MI300_NA_BANK_1_0 GENMASK(13, 12) +#define MI300_NA_COL_4 BIT(14) +#define MI300_NA_ROW GENMASK(28, 15) +#define MI300_NA_SID GENMASK(30, 29) + +static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) +{ + u16 i, col, row, bank, pc, sid, temp; + + col = FIELD_GET(MI300_UMC_MCA_COL, addr); + bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); + row = FIELD_GET(MI300_UMC_MCA_ROW, addr); + pc = FIELD_GET(MI300_UMC_MCA_PC, addr); + sid = FIELD_GET(MI300_UMC_MCA_SID, addr); + + /* Calculate hash for each Bank bit. */ + for (i = 0; i < NUM_BANK_BITS; i++) { + if (!addr_hash.bank[i].xor_enable) + continue; + + temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor); + bank ^= temp << i; + } + + /* Calculate hash for PC bit. */ + if (addr_hash.pc.xor_enable) { + /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ + bank |= sid << 5; + + temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); + temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); + pc ^= temp; + + /* Drop SID bits for the sake of debug printing later. */ + bank &= 0x1F; + } + + /* Reconstruct the normalized address starting with NA[4:0] = 0 */ + addr = 0; + + /* NA[6:5] = Column[1:0] */ + temp = col & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); + + /* NA[7] = PC */ + addr |= FIELD_PREP(MI300_NA_PC, pc); + + /* NA[9:8] = Column[3:2] */ + temp = (col >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); + + /* NA[11:10] = Bank[3:2] */ + temp = (bank >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); + + /* NA[13:12] = Bank[1:0] */ + temp = bank & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); + + /* NA[14] = Column[4] */ + temp = (col >> 4) & 0x1; + addr |= FIELD_PREP(MI300_NA_COL_4, temp); + + /* NA[28:15] = Row[13:0] */ + addr |= FIELD_PREP(MI300_NA_ROW, row); + + /* NA[30:29] = SID[1:0] */ + addr |= FIELD_PREP(MI300_NA_SID, sid); + + pr_debug("Addr=0x%016lx", addr); + pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); + + return addr; +} + +static unsigned long get_addr(unsigned long addr) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return convert_dram_to_norm_addr_mi300(addr); + + return addr; +} + #define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) static u8 get_die_id(struct atl_err *err) { @@ -82,7 +280,7 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { u8 socket_id = topology_physical_package_id(err->cpu); u8 coh_st_inst_id = get_coh_st_inst_id(err); - unsigned long addr = err->addr; + unsigned long addr = get_addr(err->addr); u8 die_id = get_die_id(err); pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", -- cgit v1.2.3 From 65c441ec582247757059e0662fb6f9ebce4965f2 Mon Sep 17 00:00:00 2001 From: Lili Li Date: Mon, 29 Jan 2024 14:20:39 +0800 Subject: EDAC/igen6: Add one more Intel Alder Lake-N SoC support Add a new Intel Alder Lake-N SoC compute die ID for EDAC support. Signed-off-by: Lili Li Signed-off-by: Tony Luck Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20240129062040.60809-2-qiuxu.zhuo@intel.com --- drivers/edac/igen6_edac.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 2b0ecdeba5cd..cdd8480e7368 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -238,6 +238,7 @@ static struct work_struct ecclog_work; #define DID_ADL_N_SKU9 0x4678 #define DID_ADL_N_SKU10 0x4679 #define DID_ADL_N_SKU11 0x467c +#define DID_ADL_N_SKU12 0x4632 /* Compute die IDs for Raptor Lake-P with IBECC */ #define DID_RPL_P_SKU1 0xa706 @@ -583,6 +584,7 @@ static const struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, + { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, -- cgit v1.2.3 From e77086c3750834553cf6fd2255c5f3ee04843ed8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 29 Jan 2024 14:20:40 +0800 Subject: EDAC/i10nm: Add Intel Grand Ridge micro-server support The Grand Ridge CPU model uses similar memory controller registers with Granite Rapids server. Add Grand Ridge CPU model ID for EDAC support. Tested-by: Ricardo Neri Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20240129062040.60809-3-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2b83d6de9352..3fd22a1eb1a9 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -951,6 +951,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); -- cgit v1.2.3 From 59950610c0c00c7a06d8a75d2ee5d73dba4274cf Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 8 Nov 2023 09:07:01 -0600 Subject: mtd: spinand: gigadevice: Fix the get ecc status issue Some GigaDevice ecc_get_status functions use on-stack buffer for spi_mem_op causes spi_mem_check_op failing, fix the issue by using spinand scratchbuf. Fixes: c40c7a990a46 ("mtd: spinand: Add support for GigaDevice GD5F1GQ4UExxG") Signed-off-by: Han Xu Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231108150701.593912-1-han.xu@nxp.com --- drivers/mtd/nand/spi/gigadevice.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 987710e09441..6023cba748bb 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: -- cgit v1.2.3 From 5ab9bbf6c678444dd99afabd44665e7f04047cc5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 4 Jan 2024 09:14:46 +0100 Subject: mtd: Fix possible refcounting issue when going through partition nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under normal conditions, the loop goes over all child partitions, and 'breaks' when the relevant partition is found. In this case we get a reference to the partition node without ever releasing it. Indeed, right after the mtd_check_of_node() function returns, we call of_node_get() again over this very same node. It is probably safer to keep the counters even in this helper and call of_node_put() before break-ing. Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202312250546.ISzglvM2-lkp@intel.com/ Cc: Christian Marangi Cc: Rafał Miłecki Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240104081446.126540-1-miquel.raynal@bootlin.com --- drivers/mtd/mtdcore.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e451b28840d5..5887feb347a4 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -621,6 +621,7 @@ static void mtd_check_of_node(struct mtd_info *mtd) if (plen == mtd_name_len && !strncmp(mtd->name, pname + offset, plen)) { mtd_set_of_node(mtd, mtd_dn); + of_node_put(mtd_dn); break; } } -- cgit v1.2.3 From e6a30d0c48a1e8a68f1cc413bee65302ab03ddfb Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Mon, 5 Feb 2024 15:44:35 +0200 Subject: mtd: rawnand: marvell: fix layouts The check in nand_base.c, nand_scan_tail() : has the following code: (ecc->steps * ecc->size != mtd->writesize) which fails for some NAND chips. Remove ECC entries in this driver which are not integral multiplications, and adjust the number of chunks for entries which fails the above calculation so it will calculate correctly (this was previously done automatically before the check and was removed in a later commit). Fixes: 68c18dae6888 ("mtd: rawnand: marvell: add missing layouts") Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/marvell_nand.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index a46698744850..5b0f5a9cef81 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -290,16 +290,13 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = { MARVELL_LAYOUT( 2048, 512, 4, 1, 1, 2048, 32, 30, 0, 0, 0), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,32, 30), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,64, 30), - MARVELL_LAYOUT( 2048, 512, 12, 3, 2, 704, 0, 30,640, 0, 30), - MARVELL_LAYOUT( 2048, 512, 16, 5, 4, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 2048, 512, 16, 4, 4, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 4096, 512, 4, 2, 2, 2048, 32, 30, 0, 0, 0), - MARVELL_LAYOUT( 4096, 512, 8, 5, 4, 1024, 0, 30, 0, 64, 30), - MARVELL_LAYOUT( 4096, 512, 12, 6, 5, 704, 0, 30,576, 32, 30), - MARVELL_LAYOUT( 4096, 512, 16, 9, 8, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 4096, 512, 8, 4, 4, 1024, 0, 30, 0, 64, 30), + MARVELL_LAYOUT( 4096, 512, 16, 8, 8, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 8192, 512, 4, 4, 4, 2048, 0, 30, 0, 0, 0), - MARVELL_LAYOUT( 8192, 512, 8, 9, 8, 1024, 0, 30, 0, 160, 30), - MARVELL_LAYOUT( 8192, 512, 12, 12, 11, 704, 0, 30,448, 64, 30), - MARVELL_LAYOUT( 8192, 512, 16, 17, 16, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 8192, 512, 8, 8, 8, 1024, 0, 30, 0, 160, 30), + MARVELL_LAYOUT( 8192, 512, 16, 16, 16, 512, 0, 30, 0, 32, 30), }; /** -- cgit v1.2.3 From 8fea0c8fda30129b4168464975505d5dc9735ac1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 Feb 2024 11:34:34 -1000 Subject: usb: core: hcd: Convert from tasklet to BH workqueue The only generic interface to execute asynchronously in the BH context is tasklet; however, it's marked deprecated and has some design flaws. To replace tasklets, BH workqueue support was recently added. A BH workqueue behaves similarly to regular workqueues except that the queued work items are executed in the BH context. This patch converts usb hcd from tasklet to BH workqueue. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Alan Stern Cc: linux-usb@vger.kernel.org --- drivers/usb/core/hcd.c | 23 ++++++++++++----------- include/linux/usb/hcd.h | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 12b6dfeaf658..edf74458474a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1664,9 +1664,10 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_put_urb(urb); } -static void usb_giveback_urb_bh(struct tasklet_struct *t) +static void usb_giveback_urb_bh(struct work_struct *work) { - struct giveback_urb_bh *bh = from_tasklet(bh, t, bh); + struct giveback_urb_bh *bh = + container_of(work, struct giveback_urb_bh, bh); struct list_head local_list; spin_lock_irq(&bh->lock); @@ -1691,9 +1692,9 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t) spin_lock_irq(&bh->lock); if (!list_empty(&bh->head)) { if (bh->high_prio) - tasklet_hi_schedule(&bh->bh); + queue_work(system_bh_highpri_wq, &bh->bh); else - tasklet_schedule(&bh->bh); + queue_work(system_bh_wq, &bh->bh); } bh->running = false; spin_unlock_irq(&bh->lock); @@ -1706,7 +1707,7 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t) * @status: completion status code for the URB. * * Context: atomic. The completion callback is invoked in caller's context. - * For HCDs with HCD_BH flag set, the completion callback is invoked in tasklet + * For HCDs with HCD_BH flag set, the completion callback is invoked in BH * context (except for URBs submitted to the root hub which always complete in * caller's context). * @@ -1725,7 +1726,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status) struct giveback_urb_bh *bh; bool running; - /* pass status to tasklet via unlinked */ + /* pass status to BH via unlinked */ if (likely(!urb->unlinked)) urb->unlinked = status; @@ -1747,9 +1748,9 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status) if (running) ; else if (bh->high_prio) - tasklet_hi_schedule(&bh->bh); + queue_work(system_bh_highpri_wq, &bh->bh); else - tasklet_schedule(&bh->bh); + queue_work(system_bh_wq, &bh->bh); } EXPORT_SYMBOL_GPL(usb_hcd_giveback_urb); @@ -2540,7 +2541,7 @@ static void init_giveback_urb_bh(struct giveback_urb_bh *bh) spin_lock_init(&bh->lock); INIT_LIST_HEAD(&bh->head); - tasklet_setup(&bh->bh, usb_giveback_urb_bh); + INIT_WORK(&bh->bh, usb_giveback_urb_bh); } struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, @@ -2926,7 +2927,7 @@ int usb_add_hcd(struct usb_hcd *hcd, && device_can_wakeup(&hcd->self.root_hub->dev)) dev_dbg(hcd->self.controller, "supports USB remote wakeup\n"); - /* initialize tasklets */ + /* initialize BHs */ init_giveback_urb_bh(&hcd->high_prio_bh); hcd->high_prio_bh.high_prio = true; init_giveback_urb_bh(&hcd->low_prio_bh); @@ -3036,7 +3037,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) mutex_unlock(&usb_bus_idr_lock); /* - * tasklet_kill() isn't needed here because: + * flush_work() isn't needed here because: * - driver's disconnect() called from usb_disconnect() should * make sure its URBs are completed during the disconnect() * callback diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 00724b4f6e12..f698aac71de3 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -55,7 +55,7 @@ struct giveback_urb_bh { bool high_prio; spinlock_t lock; struct list_head head; - struct tasklet_struct bh; + struct work_struct bh; struct usb_host_endpoint *completing_ep; }; -- cgit v1.2.3 From 61c90765e131e63ead773b9b99167415e246a945 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 28 Dec 2023 20:55:51 +0800 Subject: md: remove redundant check of 'mddev->sync_thread' The lifetime of sync_thread: 1) Set MD_RECOVERY_NEEDED and wake up daemon thread (by ioctl/sysfs or other events); 2) Daemon thread woke up, md_check_recovery() found that MD_RECOVERY_NEEDED is set: a) try to grab reconfig_mutex; b) set MD_RECOVERY_RUNNING; c) clear MD_RECOVERY_NEEDED, and then queue sync_work; 3) md_start_sync() choose sync_action, then register sync_thread; 4) md_do_sync() is done, set MD_RECOVERY_DONE and wake up daemon thread; 5) Daemon thread woke up, md_check_recovery() found that MD_RECOVERY_DONE is set: a) try to grab reconfig_mutex; b) unregister sync_thread; c) clear MD_RECOVERY_RUNNING and MD_RECOVERY_DONE; Hence there is no such case that MD_RECOVERY_RUNNING is not set, while sync_thread is registered. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231228125553.2697765-2-yukuai1@huaweicloud.com --- drivers/md/md.c | 14 ++++---------- drivers/md/raid5.c | 6 ++---- 2 files changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 2266358d8074..9b6b34ab9d1d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3339,8 +3339,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev, if (kstrtoull(buf, 10, &new_offset) < 0) return -EINVAL; - if (mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING,&mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; if (new_offset == rdev->data_offset) /* reset is always permitted */ @@ -4013,8 +4012,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) */ rv = -EBUSY; - if (mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || mddev->reshape_position != MaxSector || mddev->sysfs_active) goto out_unlock; @@ -6408,7 +6406,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || - mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); err = -EBUSY; @@ -6461,7 +6458,6 @@ static int do_md_stop(struct mddev *mddev, int mode, mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || - mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); @@ -7307,8 +7303,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) * of each device. If num_sectors is zero, we find the largest size * that fits. */ - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - mddev->sync_thread) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; if (!md_is_rdwr(mddev)) return -EROFS; @@ -7345,8 +7340,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) if (raid_disks <= 0 || (mddev->max_disks && raid_disks >= mddev->max_disks)) return -EINVAL; - if (mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) || mddev->reshape_position != MaxSector) return -EBUSY; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8497880135ee..14f2cf75abbd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6967,10 +6967,8 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) pr_debug("md/raid: change stripe_size from %lu to %lu\n", conf->stripe_size, new); - if (mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - mddev->reshape_position != MaxSector || - mddev->sysfs_active) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + mddev->reshape_position != MaxSector || mddev->sysfs_active) { err = -EBUSY; goto out_unlock; } -- cgit v1.2.3 From faeaf210a559eb05bc1a294082d100d01c49a1e9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 28 Dec 2023 20:55:52 +0800 Subject: md: remove redundant md_wakeup_thread() On the one hand, mddev_unlock() will call md_wakeup_thread() unconditionally; on the other hand, md_check_recovery() can't make progress if 'reconfig_mutex' can't be grabbed. Hence, it really doesn't make sense to wake up daemon thread while 'reconfig_mutex' is still grabbed. Remove all the md_wakup_thread() for 'mddev->thread' while 'reconfig_mtuex' is still grabbed. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231228125553.2697765-3-yukuai1@huaweicloud.com --- drivers/md/md.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9b6b34ab9d1d..0b132ee2672e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2929,7 +2929,6 @@ static int add_bound_rdev(struct md_rdev *rdev) set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_new_event(); - md_wakeup_thread(mddev->thread); return 0; } @@ -3044,10 +3043,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) if (err == 0) { md_kick_rdev_from_array(rdev); - if (mddev->pers) { + if (mddev->pers) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - md_wakeup_thread(mddev->thread); - } md_new_event(); } } @@ -3077,7 +3074,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(BlockedBadBlocks, &rdev->flags); wake_up(&rdev->blocked_wait); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); - md_wakeup_thread(rdev->mddev->thread); err = 0; } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { @@ -3115,7 +3111,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) !test_bit(Replacement, &rdev->flags)) set_bit(WantReplacement, &rdev->flags); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); - md_wakeup_thread(rdev->mddev->thread); err = 0; } else if (cmd_match(buf, "-want_replacement")) { /* Clearing 'want_replacement' is always allowed. @@ -3245,7 +3240,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->raid_disk >= 0) return -EBUSY; set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); - md_wakeup_thread(rdev->mddev->thread); } else if (rdev->mddev->pers) { /* Activating a spare .. or possibly reactivating * if we ever get bitmaps working here. @@ -6186,7 +6180,6 @@ int do_md_run(struct mddev *mddev) /* run start up tasks that require md_thread */ md_start(mddev); - md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); @@ -6207,7 +6200,6 @@ int md_start(struct mddev *mddev) if (mddev->pers->start) { set_bit(MD_RECOVERY_WAIT, &mddev->recovery); - md_wakeup_thread(mddev->thread); ret = mddev->pers->start(mddev); clear_bit(MD_RECOVERY_WAIT, &mddev->recovery); md_wakeup_thread(mddev->sync_thread); @@ -6252,7 +6244,6 @@ static int restart_array(struct mddev *mddev) pr_debug("md: %s switched to read-write mode.\n", mdname(mddev)); /* Kick recovery or resync if necessary */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); sysfs_notify_dirent_safe(mddev->sysfs_state); return 0; @@ -6396,7 +6387,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { did_freeze = 1; set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - md_wakeup_thread(mddev->thread); } stop_sync_thread(mddev, false, false); @@ -6428,7 +6418,6 @@ out: if ((mddev->pers && !err) || did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); sysfs_notify_dirent_safe(mddev->sysfs_state); } @@ -6450,7 +6439,6 @@ static int do_md_stop(struct mddev *mddev, int mode, if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { did_freeze = 1; set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - md_wakeup_thread(mddev->thread); } stop_sync_thread(mddev, true, false); @@ -6464,7 +6452,6 @@ static int do_md_stop(struct mddev *mddev, int mode, if (did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); } return -EBUSY; } @@ -7005,9 +6992,7 @@ kick_rdev: md_kick_rdev_from_array(rdev); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - if (mddev->thread) - md_wakeup_thread(mddev->thread); - else + if (!mddev->thread) md_update_sb(mddev, 1); md_new_event(); @@ -7089,7 +7074,6 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) * array immediately. */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); md_new_event(); return 0; -- cgit v1.2.3 From 9cfcf99e7ed613e6b3697e1c1034a24487ec3154 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Fri, 29 Dec 2023 15:05:00 +0800 Subject: md: get rdev->mddev with READ_ONCE() Users may get rdev->mddev by sysfs while rdev is releasing. So use both READ_ONCE() and WRITE_ONCE() to prevent load/store tearing and to read/write mddev atomically. Signed-off-by: Li Lingfeng Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231229070500.3602712-1-lilingfeng@huaweicloud.com --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0b132ee2672e..5eff8e84cddf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2591,7 +2591,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) list_del_rcu(&rdev->same_set); pr_debug("md: unbind<%pg>\n", rdev->bdev); mddev_destroy_serial_pool(rdev->mddev, rdev); - rdev->mddev = NULL; + WRITE_ONCE(rdev->mddev, NULL); sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); sysfs_put(rdev->sysfs_unack_badblocks); @@ -3664,7 +3664,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, struct kernfs_node *kn = NULL; bool suspend = false; ssize_t rv; - struct mddev *mddev = rdev->mddev; + struct mddev *mddev = READ_ONCE(rdev->mddev); if (!entry->store) return -EIO; -- cgit v1.2.3 From 570b9147deb6b07b955b55e06c714ca12a5f3e16 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 4 Jan 2024 21:36:29 +0800 Subject: md: use RCU lock to protect traversal in md_spares_need_change() Since md_start_sync() will be called without the protect of mddev_lock, and it can run concurrently with array reconfiguration, traversal of rdev in it should be protected by RCU lock. Commit bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") added md_spares_need_change() to md_start_sync(), casusing use of rdev without any protection. Fix this by adding RCU lock in md_spares_need_change(). Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") Cc: stable@vger.kernel.org # 6.7+ Signed-off-by: Li Lingfeng Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240104133629.1277517-1-lilingfeng@huaweicloud.com --- drivers/md/md.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 5eff8e84cddf..45bb387d69c4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9240,9 +9240,14 @@ static bool md_spares_need_change(struct mddev *mddev) { struct md_rdev *rdev; - rdev_for_each(rdev, mddev) - if (rdev_removeable(rdev) || rdev_addable(rdev)) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev_removeable(rdev) || rdev_addable(rdev)) { + rcu_read_unlock(); return true; + } + } + rcu_read_unlock(); return false; } -- cgit v1.2.3 From 95b77082b790633129e318efde85a51571570395 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 1 Feb 2024 22:45:49 +0000 Subject: md/linear: Get rid of md-linear.h Given that 849d18e27be9 ("md: Remove deprecated CONFIG_MD_LINEAR") killed the linear flavour of MD, it seems only logical to drop the leftover include file that used to come with it. I also feel that it should be my own privilege to remove my 30 year old attempt at writing kernel code ;-). RIP! Cc: Song Liu Cc: Yu Kuai Signed-off-by: Marc Zyngier Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201224549.750644-1-maz@kernel.org --- drivers/md/md-linear.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 drivers/md/md-linear.h (limited to 'drivers') diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h deleted file mode 100644 index 5587eeedb882..000000000000 --- a/drivers/md/md-linear.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINEAR_H -#define _LINEAR_H - -struct dev_info { - struct md_rdev *rdev; - sector_t end_sector; -}; - -struct linear_conf -{ - struct rcu_head rcu; - sector_t array_sectors; - int raid_disks; /* a copy of mddev->raid_disks */ - struct dev_info disks[] __counted_by(raid_disks); -}; -#endif -- cgit v1.2.3 From 83cbdaf61b1ab9cdaa0321eeea734bc70ca069c8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 5 Feb 2024 15:34:39 -0800 Subject: md/multipath: Remove md-multipath.h md-multipath is already deprecated. Remove the header file. Signed-off-by: Song Liu --- drivers/md/md-multipath.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 drivers/md/md-multipath.h (limited to 'drivers') diff --git a/drivers/md/md-multipath.h b/drivers/md/md-multipath.h deleted file mode 100644 index b3099e5fc4d7..000000000000 --- a/drivers/md/md-multipath.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MULTIPATH_H -#define _MULTIPATH_H - -struct multipath_info { - struct md_rdev *rdev; -}; - -struct mpconf { - struct mddev *mddev; - struct multipath_info *multipaths; - int raid_disks; - spinlock_t device_lock; - struct list_head retry_list; - - mempool_t pool; -}; - -/* - * this is our 'private' 'collective' MULTIPATH buffer head. - * it contains information about what kind of IO operations were started - * for this MULTIPATH operation, and about their status: - */ - -struct multipath_bh { - struct mddev *mddev; - struct bio *master_bio; - struct bio bio; - int path; - struct list_head retry_list; -}; -#endif -- cgit v1.2.3 From 052618c71c66d5de5e9b6cbcbad26932d951919c Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sun, 4 Feb 2024 12:31:42 -0300 Subject: block: rbd: make rbd_bus_type const Now that the driver core can properly handle constant struct bus_type, move the rbd_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/20240204-bus_cleanup-block-v1-1-fc77afd8d7cc@marliere.net Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 12b5d53ec856..00ca8a1d8c46 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -575,7 +575,7 @@ static const struct attribute_group rbd_bus_group = { }; __ATTRIBUTE_GROUPS(rbd_bus); -static struct bus_type rbd_bus_type = { +static const struct bus_type rbd_bus_type = { .name = "rbd", .bus_groups = rbd_bus_groups, }; -- cgit v1.2.3 From cd665bfc757c71e9b7e0abff0f362d8abd38a805 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:57 +0100 Subject: dmaengine: dw-edma: Fix the ch_count hdma callback The current check of ch_en enabled to know the maximum number of available hardware channels is wrong as it check the number of ch_en register set but all of them are unset at probe. This register is set at the dw_hdma_v0_core_start function which is run lately before a DMA transfer. The HDMA IP have no way to know the number of hardware channels available like the eDMA IP, then let set it to maximum channels and let the platform set the right number of channels. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Acked-by: Manivannan Sadhasivam Reviewed-by: Serge Semin Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-1-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 00b735a0202a..1f4cb7db5475 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -65,18 +65,12 @@ static void dw_hdma_v0_core_off(struct dw_edma *dw) static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) { - u32 num_ch = 0; - int id; - - for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) { - if (GET_CH_32(dw, id, dir, ch_en) & BIT(0)) - num_ch++; - } - - if (num_ch > HDMA_V0_MAX_NR_CH) - num_ch = HDMA_V0_MAX_NR_CH; - - return (u16)num_ch; + /* + * The HDMA IP have no way to know the number of hardware channels + * available, we set it to maximum channels and let the platform + * set the right number of channels. + */ + return HDMA_V0_MAX_NR_CH; } static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan) -- cgit v1.2.3 From 7b52ba8616e978bf4f38f207f11a8176517244d0 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:58 +0100 Subject: dmaengine: dw-edma: Fix wrong interrupt bit set for HDMA Instead of setting HDMA_V0_LOCAL_ABORT_INT_EN bit, HDMA_V0_LOCAL_STOP_INT_EN bit got set twice, due to which the abort interrupt is not getting generated for HDMA. Fix it by setting the correct interrupt enable bit. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-2-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 1f4cb7db5475..108f9127aaaa 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -236,7 +236,7 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Interrupt enable&unmask - done, abort */ tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | - HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN; + HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); /* Channel control */ SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); -- cgit v1.2.3 From 930a8a015dcfde4b8906351ff081066dc277748c Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:59 +0100 Subject: dmaengine: dw-edma: HDMA_V0_REMOTEL_STOP_INT_EN typo fix Fix "HDMA_V0_REMOTEL_STOP_INT_EN" typo error Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-3-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h index a974abdf8aaf..eab5fd7177e5 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h +++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h @@ -15,7 +15,7 @@ #define HDMA_V0_LOCAL_ABORT_INT_EN BIT(6) #define HDMA_V0_REMOTE_ABORT_INT_EN BIT(5) #define HDMA_V0_LOCAL_STOP_INT_EN BIT(4) -#define HDMA_V0_REMOTEL_STOP_INT_EN BIT(3) +#define HDMA_V0_REMOTE_STOP_INT_EN BIT(3) #define HDMA_V0_ABORT_INT_MASK BIT(2) #define HDMA_V0_STOP_INT_MASK BIT(0) #define HDMA_V0_LINKLIST_EN BIT(0) -- cgit v1.2.3 From e2f6a5789051ee9c632f27a12d0f01f0cbf78aac Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:00 +0100 Subject: dmaengine: dw-edma: Add HDMA remote interrupt configuration Only the local interruption was configured, remote interrupt was left behind. This patch fix it by setting stop and abort remote interrupts when the DW_EDMA_CHIP_LOCAL flag is not set. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Signed-off-by: Kory Maincent Reviewed-by: Serge Semin Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-4-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 108f9127aaaa..04b0bcb6ded9 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -237,6 +237,8 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; + if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); /* Channel control */ SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); -- cgit v1.2.3 From 712a92a48158e02155b4b6b21e03a817f78c9b7e Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:01 +0100 Subject: dmaengine: dw-edma: HDMA: Add sync read before starting the DMA transfer in remote setup The Linked list element and pointer are not stored in the same memory as the HDMA controller register. If the doorbell register is toggled before the full write of the linked list a race condition error will occur. In remote setup we can only use a readl to the memory to assure the full write has occurred. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-5-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 04b0bcb6ded9..10e8f0715114 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -222,6 +222,20 @@ static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote HDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -252,6 +266,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Set consumer cycle */ SET_CH_32(dw, chan->dir, chan->id, cycle_sync, HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); + + dw_hdma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START); } -- cgit v1.2.3 From bbcc1c83f343e580c3aa1f2a8593343bf7b55bba Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:02 +0100 Subject: dmaengine: dw-edma: eDMA: Add sync read before starting the DMA transfer in remote setup The Linked list element and pointer are not stored in the same memory as the eDMA controller register. If the doorbell register is toggled before the full write of the linked list a race condition error will occur. In remote setup we can only use a readl to the memory to assure the full write has occurred. Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-6-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-v0-core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index b38786f0ad79..b75fdaffad9a 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -346,6 +346,20 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_edma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote eDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -412,6 +426,9 @@ static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) SET_CH_32(dw, chan->dir, chan->id, llp.msb, upper_32_bits(chunk->ll_region.paddr)); } + + dw_edma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_RW_32(dw, chan->dir, doorbell, FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id)); -- cgit v1.2.3 From 9d739bccf261dd93ec1babf82f5c5d71dd4caa3e Mon Sep 17 00:00:00 2001 From: Peng Ma Date: Thu, 1 Feb 2024 16:50:07 -0500 Subject: dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read There is chip (ls1028a) errata: The SoC may hang on 16 byte unaligned read transactions by QDMA. Unaligned read transactions initiated by QDMA may stall in the NOC (Network On-Chip), causing a deadlock condition. Stalled transactions will trigger completion timeouts in PCIe controller. Workaround: Enable prefetch by setting the source descriptor prefetchable bit ( SD[PF] = 1 ). Implement this workaround. Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Peng Ma Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201215007.439503-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index f405c77060ad..70e8b7d425c8 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << -- cgit v1.2.3 From 87a39071e0b639f45e05d296cc0538eef44ec0bd Mon Sep 17 00:00:00 2001 From: Curtis Klein Date: Thu, 1 Feb 2024 17:04:06 -0500 Subject: dmaengine: fsl-qdma: init irq after reg initialization Initialize the qDMA irqs after the registers are configured so that interrupts that may have been pending from a primary kernel don't get processed by the irq handler before it is ready to and cause panic with the following trace: Call trace: fsl_qdma_queue_handler+0xf8/0x3e8 __handle_irq_event_percpu+0x78/0x2b0 handle_irq_event_percpu+0x1c/0x68 handle_irq_event+0x44/0x78 handle_fasteoi_irq+0xc8/0x178 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x90/0x100 gic_handle_irq+0x5c/0xb8 el1_irq+0xb8/0x180 _raw_spin_unlock_irqrestore+0x14/0x40 __setup_irq+0x4bc/0x798 request_threaded_irq+0xd8/0x190 devm_request_threaded_irq+0x74/0xe8 fsl_qdma_probe+0x4d4/0xca8 platform_drv_probe+0x50/0xa0 really_probe+0xe0/0x3f8 driver_probe_device+0x64/0x130 device_driver_attach+0x6c/0x78 __driver_attach+0xbc/0x158 bus_for_each_dev+0x5c/0x98 driver_attach+0x20/0x28 bus_add_driver+0x158/0x220 driver_register+0x60/0x110 __platform_driver_register+0x44/0x50 fsl_qdma_driver_init+0x18/0x20 do_one_initcall+0x48/0x258 kernel_init_freeable+0x1a4/0x23c kernel_init+0x10/0xf8 ret_from_fork+0x10/0x18 Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Curtis Klein Signed-off-by: Yi Zhao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201220406.440145-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 70e8b7d425c8..1e3bf6f30f78 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -1198,10 +1198,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1240,16 +1236,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); + ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } - ret = fsl_qdma_reg_init(fsl_qdma); + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); if (ret) { - dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); return ret; } -- cgit v1.2.3 From 9ba17defd9edd87970b701085402bc8ecc3a11d4 Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Wed, 31 Jan 2024 11:33:18 -0500 Subject: dmaengine: fsl-edma: correct calculation of 'nbytes' in multi-fifo scenario The 'nbytes' should be equivalent to burst * width in audio multi-fifo setups. Given that the FIFO width is fixed at 32 bits, adjusts the burst size for multi-fifo configurations to match the slave maxburst in the configuration. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Joy Zou Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240131163318.360315-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index b53f46245c37..793f1a7ad5e3 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -503,7 +503,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, if (fsl_chan->is_multi_fifo) { /* set mloff to support multiple fifo */ burst = cfg->direction == DMA_DEV_TO_MEM ? - cfg->src_addr_width : cfg->dst_addr_width; + cfg->src_maxburst : cfg->dst_maxburst; nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4)); /* enable DMLOE/SMLOE */ if (cfg->direction == DMA_MEM_TO_DEV) { -- cgit v1.2.3 From 7936378cb6d87073163130e1e1fc1e5f76a597cf Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:33:43 +0100 Subject: phy: freescale: phy-fsl-imx8-mipi-dphy: Fix alias name to use dashes Devicetree spec lists only dashes as valid characters for alias names. Table 3.2: Valid characters for alias names, Devicee Specification, Release v0.4 Signed-off-by: Alexander Stein Fixes: 3fbae284887de ("phy: freescale: phy-fsl-imx8-mipi-dphy: Add i.MX8qxp LVDS PHY mode support") Link: https://lore.kernel.org/r/20240110093343.468810-1-alexander.stein@ew.tq-group.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bf..0928a526e2ab 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); -- cgit v1.2.3 From 95055beb067cb30f626fb10f7019737ca7681df0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:13:45 +0800 Subject: phy: qcom: phy-qcom-m31: fix wrong pointer pass to PTR_ERR() It should be 'qphy->vreg' passed to PTR_ERR() when devm_regulator_get() fails. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824091345.1072650-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index c2590579190a..03fb0d4b75d7 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -299,7 +299,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); -- cgit v1.2.3 From 734550d60cdf634299f0eac7f7fe15763ed990bb Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 1 Feb 2024 10:39:33 +0200 Subject: phy: qualcomm: eusb2-repeater: Rework init to drop redundant zero-out loop Instead of incrementing the base of the global reg fields, which renders the second instance of the repeater broken due to wrong offsets, use regmap with base and offset. As for zeroing out the rest of the tuning regs, avoid looping though the table and just use the table as is, as it is already zero initialized. Fixes: 99a517a582fc ("phy: qualcomm: phy-qcom-eusb2-repeater: Zero out untouched tuning regs") Tested-by: Elliot Berman # sm8650-qrd Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20240201-phy-qcom-eusb2-repeater-fixes-v4-1-cf18c8cef6d7@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c | 166 +++++++++---------------- 1 file changed, 62 insertions(+), 104 deletions(-) (limited to 'drivers') diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index a623f092b11f..a43e20abb10d 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -37,56 +37,28 @@ #define EUSB2_TUNE_EUSB_EQU 0x5A #define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B -#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \ - { \ - .reg = r, \ - .val = v, \ - } - -enum reg_fields { - F_TUNE_EUSB_HS_COMP_CUR, - F_TUNE_EUSB_EQU, - F_TUNE_EUSB_SLEW, - F_TUNE_USB2_HS_COMP_CUR, - F_TUNE_USB2_PREEM, - F_TUNE_USB2_EQU, - F_TUNE_USB2_SLEW, - F_TUNE_SQUELCH_U, - F_TUNE_HSDISC, - F_TUNE_RES_FSDIF, - F_TUNE_IUSB2, - F_TUNE_USB2_CROSSOVER, - F_NUM_TUNE_FIELDS, - - F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS, - F_FORCE_EN_5, - - F_EN_CTL1, - - F_RPTR_STATUS, - F_NUM_FIELDS, -}; - -static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { - [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1), - [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1), - [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1), - [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1), - [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2), - [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1), - [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1), - [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2), - [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2), - [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2), - [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3), - [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2), - - [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7), - [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7), - - [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7), - - [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7), +enum eusb2_reg_layout { + TUNE_EUSB_HS_COMP_CUR, + TUNE_EUSB_EQU, + TUNE_EUSB_SLEW, + TUNE_USB2_HS_COMP_CUR, + TUNE_USB2_PREEM, + TUNE_USB2_EQU, + TUNE_USB2_SLEW, + TUNE_SQUELCH_U, + TUNE_HSDISC, + TUNE_RES_FSDIF, + TUNE_IUSB2, + TUNE_USB2_CROSSOVER, + NUM_TUNE_FIELDS, + + FORCE_VAL_5 = NUM_TUNE_FIELDS, + FORCE_EN_5, + + EN_CTL1, + + RPTR_STATUS, + LAYOUT_SIZE, }; struct eusb2_repeater_cfg { @@ -98,10 +70,11 @@ struct eusb2_repeater_cfg { struct eusb2_repeater { struct device *dev; - struct regmap_field *regs[F_NUM_FIELDS]; + struct regmap *regmap; struct phy *phy; struct regulator_bulk_data *vregs; const struct eusb2_repeater_cfg *cfg; + u32 base; enum phy_mode mode; }; @@ -109,10 +82,10 @@ static const char * const pm8550b_vreg_l[] = { "vdd18", "vdd3", }; -static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = { - [F_TUNE_IUSB2] = 0x8, - [F_TUNE_SQUELCH_U] = 0x3, - [F_TUNE_USB2_PREEM] = 0x5, +static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = { + [TUNE_IUSB2] = 0x8, + [TUNE_SQUELCH_U] = 0x3, + [TUNE_USB2_PREEM] = 0x5, }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -140,47 +113,42 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr) static int eusb2_repeater_init(struct phy *phy) { - struct reg_field *regfields = eusb2_repeater_tune_reg_fields; struct eusb2_repeater *rptr = phy_get_drvdata(phy); struct device_node *np = rptr->dev->of_node; - u32 init_tbl[F_NUM_TUNE_FIELDS] = { 0 }; - u8 override; + struct regmap *regmap = rptr->regmap; + const u32 *init_tbl = rptr->cfg->init_tbl; + u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM]; + u8 tune_hsdisc = init_tbl[TUNE_HSDISC]; + u8 tune_iusb2 = init_tbl[TUNE_IUSB2]; + u32 base = rptr->base; u32 val; int ret; - int i; + + of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2); + of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc); + of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem); ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs); if (ret) return ret; - regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN); + regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN); - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) { - if (init_tbl[i]) { - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - } else { - /* Write 0 if there's no value set */ - u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb); - - regmap_field_update_bits(rptr->regs[i], mask, 0); - } - } - memcpy(init_tbl, rptr->cfg->init_tbl, sizeof(init_tbl)); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]); + regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]); - if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &override)) - init_tbl[F_TUNE_IUSB2] = override; + regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem); + regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc); + regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2); - if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &override)) - init_tbl[F_TUNE_HSDISC] = override; - - if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &override)) - init_tbl[F_TUNE_USB2_PREEM] = override; - - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - - ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS], - val, val & RPTR_OK, 10, 5); + ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5); if (ret) dev_err(rptr->dev, "initialization timed-out\n"); @@ -191,6 +159,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct eusb2_repeater *rptr = phy_get_drvdata(phy); + struct regmap *regmap = rptr->regmap; + u32 base = rptr->base; switch (mode) { case PHY_MODE_USB_HOST: @@ -199,10 +169,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * per eUSB 1.2 Spec. Below implement software workaround until * PHY and controller is fixing seen observation. */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, F_CLK_19P2M_EN); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, V_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, F_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, V_CLK_19P2M_EN); break; case PHY_MODE_USB_DEVICE: /* @@ -211,10 +179,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * repeater doesn't clear previous value due to shared * regulators (say host <-> device mode switch). */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, 0); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, 0); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, 0); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, 0); break; default: return -EINVAL; @@ -243,9 +209,8 @@ static int eusb2_repeater_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct phy_provider *phy_provider; struct device_node *np = dev->of_node; - struct regmap *regmap; - int i, ret; u32 res; + int ret; rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL); if (!rptr) @@ -258,22 +223,15 @@ static int eusb2_repeater_probe(struct platform_device *pdev) if (!rptr->cfg) return -EINVAL; - regmap = dev_get_regmap(dev->parent, NULL); - if (!regmap) + rptr->regmap = dev_get_regmap(dev->parent, NULL); + if (!rptr->regmap) return -ENODEV; ret = of_property_read_u32(np, "reg", &res); if (ret < 0) return ret; - for (i = 0; i < F_NUM_FIELDS; i++) - eusb2_repeater_tune_reg_fields[i].reg += res; - - ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs, - eusb2_repeater_tune_reg_fields, - F_NUM_FIELDS); - if (ret) - return ret; + rptr->base = res; ret = eusb2_repeater_init_vregs(rptr); if (ret < 0) { -- cgit v1.2.3 From 576bd4962f19bb8f437f8cecbb25e4202438c41e Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Thu, 1 Feb 2024 02:04:49 +0100 Subject: x86/kvm, ptp/kvm: Add clocksource ID, set system_counterval_t.cs_id Add a clocksource ID for the x86 kvmclock. Also, for ptp_kvm, set the recently added struct system_counterval_t member cs_id to the clocksource ID (x86 kvmclock or ARM Generic Timer). In the future, get_device_system_crosststamp() will compare the clocksource ID in struct system_counterval_t, rather than the clocksource. For now, to avoid touching too many subsystems at once, extract the clocksource ID from the clocksource. The clocksource dereference will be removed once everything is converted over.. Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240201010453.2212371-5-peter.hilber@opensynergy.com --- arch/x86/kernel/kvmclock.c | 1 + drivers/ptp/ptp_kvm_common.c | 2 ++ include/linux/clocksource_ids.h | 1 + 3 files changed, 4 insertions(+) (limited to 'drivers') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 5bb395551c44..2f1bbf730f45 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -160,6 +160,7 @@ struct clocksource kvm_clock = { .rating = 400, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .id = CSID_X86_KVM_CLK, .enable = kvm_cs_enable, }; EXPORT_SYMBOL_GPL(kvm_clock); diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c index 2418977989be..b0b36f135347 100644 --- a/drivers/ptp/ptp_kvm_common.c +++ b/drivers/ptp/ptp_kvm_common.c @@ -4,6 +4,7 @@ * * Copyright (C) 2017 Red Hat Inc. */ +#include #include #include #include @@ -47,6 +48,7 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time, system_counter->cycles = cycle; system_counter->cs = cs; + system_counter->cs_id = cs->id; *device_time = timespec64_to_ktime(tspec); diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index f8467946e9ee..a4fa3436940c 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -8,6 +8,7 @@ enum clocksource_ids { CSID_ARM_ARCH_COUNTER, CSID_X86_TSC_EARLY, CSID_X86_TSC, + CSID_X86_KVM_CLK, CSID_MAX, }; -- cgit v1.2.3 From 9be3b2f057d7a6752e8cf25c1d456198b4d3bd6a Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Thu, 1 Feb 2024 02:04:50 +0100 Subject: ptp/kvm, arm_arch_timer: Set system_counterval_t.cs_id to constant Identify the clocksources used by ptp_kvm by setting the clocksource ID enum constants. This avoids dereferencing struct clocksource. Once the system_counterval_t.cs member will be removed, this will also avoid the need to obtain clocksource pointers from kvm_arch_ptp_get_crosststamp(). The clocksource IDs are associated to timestamps requested from the KVM hypervisor, so the proper clocksource ID is known at the ptp_kvm request site. While at it, also make the ptp_kvm_get_time_fn() 'ret' variable type int as that's what the function return value is. Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240201010453.2212371-6-peter.hilber@opensynergy.com --- drivers/clocksource/arm_arch_timer.c | 5 ++++- drivers/ptp/ptp_kvm_arm.c | 2 +- drivers/ptp/ptp_kvm_common.c | 10 +++++----- drivers/ptp/ptp_kvm_x86.c | 4 +++- include/linux/ptp_kvm.h | 4 +++- 5 files changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index e054de92de91..45a02872669e 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1807,7 +1807,8 @@ TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init); #endif int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts, - struct clocksource **cs) + struct clocksource **cs, + enum clocksource_ids *cs_id) { struct arm_smccc_res hvc_res; u32 ptp_counter; @@ -1833,6 +1834,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts, *cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3; if (cs) *cs = &clocksource_counter; + if (cs_id) + *cs_id = CSID_ARM_ARCH_COUNTER; return 0; } diff --git a/drivers/ptp/ptp_kvm_arm.c b/drivers/ptp/ptp_kvm_arm.c index e68e6943167b..017bb5f03b14 100644 --- a/drivers/ptp/ptp_kvm_arm.c +++ b/drivers/ptp/ptp_kvm_arm.c @@ -28,5 +28,5 @@ void kvm_arch_ptp_exit(void) int kvm_arch_ptp_get_clock(struct timespec64 *ts) { - return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL); + return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL, NULL); } diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c index b0b36f135347..f6683ba0ab3c 100644 --- a/drivers/ptp/ptp_kvm_common.c +++ b/drivers/ptp/ptp_kvm_common.c @@ -4,7 +4,6 @@ * * Copyright (C) 2017 Red Hat Inc. */ -#include #include #include #include @@ -29,15 +28,16 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time, struct system_counterval_t *system_counter, void *ctx) { - long ret; - u64 cycle; + enum clocksource_ids cs_id; struct timespec64 tspec; struct clocksource *cs; + u64 cycle; + int ret; spin_lock(&kvm_ptp_lock); preempt_disable_notrace(); - ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs); + ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs, &cs_id); if (ret) { spin_unlock(&kvm_ptp_lock); preempt_enable_notrace(); @@ -48,7 +48,7 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time, system_counter->cycles = cycle; system_counter->cs = cs; - system_counter->cs_id = cs->id; + system_counter->cs_id = cs_id; *device_time = timespec64_to_ktime(tspec); diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c index 902844cc1a17..2782442922cb 100644 --- a/drivers/ptp/ptp_kvm_x86.c +++ b/drivers/ptp/ptp_kvm_x86.c @@ -93,7 +93,8 @@ int kvm_arch_ptp_get_clock(struct timespec64 *ts) } int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, - struct clocksource **cs) + struct clocksource **cs, + enum clocksource_ids *cs_id) { struct pvclock_vcpu_time_info *src; unsigned int version; @@ -124,6 +125,7 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, } while (pvclock_read_retry(src, version)); *cs = &kvm_clock; + *cs_id = CSID_X86_KVM_CLK; return 0; } diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 746fd67c3480..95b3d4d0d7dd 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,6 +8,7 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include #include struct timespec64; @@ -17,6 +18,7 @@ int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs); + struct timespec64 *tspec, struct clocksource **cs, + enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ -- cgit v1.2.3 From b152688c91313ab4073cff4a5e63ff4cc491c358 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Thu, 1 Feb 2024 02:04:52 +0100 Subject: treewide: Remove system_counterval_t.cs, which is never read The clocksource pointer in struct system_counterval_t is not evaluated any more. Remove the code setting the member, and the member itself. Signed-off-by: Peter Hilber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240201010453.2212371-8-peter.hilber@opensynergy.com --- arch/x86/kernel/tsc.c | 14 ++------------ drivers/clocksource/arm_arch_timer.c | 3 --- drivers/ptp/ptp_kvm_arm.c | 2 +- drivers/ptp/ptp_kvm_common.c | 4 +--- drivers/ptp/ptp_kvm_x86.c | 2 -- include/linux/ptp_kvm.h | 4 +--- include/linux/timekeeping.h | 3 --- 7 files changed, 5 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fd567a0ac90e..5a69a49acc96 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -53,7 +53,6 @@ static int __read_mostly tsc_force_recalibrate; static u32 art_to_tsc_numerator; static u32 art_to_tsc_denominator; static u64 art_to_tsc_offset; -static struct clocksource *art_related_clocksource; static bool have_art; struct cyc2ns { @@ -1313,7 +1312,6 @@ struct system_counterval_t convert_art_to_tsc(u64 art) res += tmp + art_to_tsc_offset; return (struct system_counterval_t) { - .cs = art_related_clocksource, .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC, .cycles = res, }; @@ -1337,9 +1335,6 @@ EXPORT_SYMBOL(convert_art_to_tsc); * corresponding clocksource: * cycles: System counter value * cs_id: The clocksource ID for validating comparability - * cs: Clocksource corresponding to system counter value. Used - * by timekeeping code to verify comparability of two cycle - * values. */ struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns) @@ -1355,7 +1350,6 @@ struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns) res += tmp; return (struct system_counterval_t) { - .cs = art_related_clocksource, .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC, .cycles = res, }; @@ -1464,10 +1458,8 @@ out: if (tsc_unstable) goto unreg; - if (boot_cpu_has(X86_FEATURE_ART)) { - art_related_clocksource = &clocksource_tsc; + if (boot_cpu_has(X86_FEATURE_ART)) have_art = true; - } clocksource_register_khz(&clocksource_tsc, tsc_khz); unreg: clocksource_unregister(&clocksource_tsc_early); @@ -1492,10 +1484,8 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { - if (boot_cpu_has(X86_FEATURE_ART)) { - art_related_clocksource = &clocksource_tsc; + if (boot_cpu_has(X86_FEATURE_ART)) have_art = true; - } clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_unregister(&clocksource_tsc_early); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 45a02872669e..8d4a52056684 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1807,7 +1807,6 @@ TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init); #endif int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts, - struct clocksource **cs, enum clocksource_ids *cs_id) { struct arm_smccc_res hvc_res; @@ -1832,8 +1831,6 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts, *ts = ktime_to_timespec64(ktime); if (cycle) *cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3; - if (cs) - *cs = &clocksource_counter; if (cs_id) *cs_id = CSID_ARM_ARCH_COUNTER; diff --git a/drivers/ptp/ptp_kvm_arm.c b/drivers/ptp/ptp_kvm_arm.c index 017bb5f03b14..e68e6943167b 100644 --- a/drivers/ptp/ptp_kvm_arm.c +++ b/drivers/ptp/ptp_kvm_arm.c @@ -28,5 +28,5 @@ void kvm_arch_ptp_exit(void) int kvm_arch_ptp_get_clock(struct timespec64 *ts) { - return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL, NULL); + return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL); } diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c index f6683ba0ab3c..15ccb7dd2ed0 100644 --- a/drivers/ptp/ptp_kvm_common.c +++ b/drivers/ptp/ptp_kvm_common.c @@ -30,14 +30,13 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time, { enum clocksource_ids cs_id; struct timespec64 tspec; - struct clocksource *cs; u64 cycle; int ret; spin_lock(&kvm_ptp_lock); preempt_disable_notrace(); - ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs, &cs_id); + ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs_id); if (ret) { spin_unlock(&kvm_ptp_lock); preempt_enable_notrace(); @@ -47,7 +46,6 @@ static int ptp_kvm_get_time_fn(ktime_t *device_time, preempt_enable_notrace(); system_counter->cycles = cycle; - system_counter->cs = cs; system_counter->cs_id = cs_id; *device_time = timespec64_to_ktime(tspec); diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c index 2782442922cb..617c8d6706d3 100644 --- a/drivers/ptp/ptp_kvm_x86.c +++ b/drivers/ptp/ptp_kvm_x86.c @@ -93,7 +93,6 @@ int kvm_arch_ptp_get_clock(struct timespec64 *ts) } int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, - struct clocksource **cs, enum clocksource_ids *cs_id) { struct pvclock_vcpu_time_info *src; @@ -124,7 +123,6 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, *cycle = __pvclock_read_cycles(src, clock_pair->tsc); } while (pvclock_read_retry(src, version)); - *cs = &kvm_clock; *cs_id = CSID_X86_KVM_CLK; return 0; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 95b3d4d0d7dd..e8c74fa3f455 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -12,13 +12,11 @@ #include struct timespec64; -struct clocksource; int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs, - enum clocksource_ids *cs_id); + struct timespec64 *tspec, enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3538c5bdf9ee..7e50cbd97f86 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -271,8 +271,6 @@ struct system_device_crosststamp { * struct system_counterval_t - system counter value with the ID of the * corresponding clocksource * @cycles: System counter value - * @cs: Clocksource corresponding to system counter value. Timekeeping - * code now evaluates cs_id instead. * @cs_id: Clocksource ID corresponding to system counter value. Used by * timekeeping code to verify comparability of two cycle values. * The default ID, CSID_GENERIC, does not identify a specific @@ -280,7 +278,6 @@ struct system_device_crosststamp { */ struct system_counterval_t { u64 cycles; - struct clocksource *cs; enum clocksource_ids cs_id; }; -- cgit v1.2.3 From e5de34db1e95ef549236f9a2630d396a41c208a2 Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:40 +0100 Subject: s390/dasd: Simplify uid string generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two variants of the device uid string. One containing the virtual device unit information table (vduit) identifying the device as a virtual device located on a real device in a z/VM environment. The other variant does not contain those additional information. Simplify the string generation with a shorter check of an existing vduit embedded in the snprintf() calls. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 12 +++--------- drivers/s390/block/dasd_eckd.c | 16 ++++------------ 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index c4e36650c426..6297dfe6bc67 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1412,15 +1412,9 @@ dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf) break; } - if (strlen(uid.vduit) > 0) - snprintf(uid_string, sizeof(uid_string), - "%s.%s.%04x.%s.%s", - uid.vendor, uid.serial, uid.ssid, ua_string, - uid.vduit); - else - snprintf(uid_string, sizeof(uid_string), - "%s.%s.%04x.%s", - uid.vendor, uid.serial, uid.ssid, ua_string); + snprintf(uid_string, sizeof(uid_string), "%s.%s.%04x.%s%s%s", + uid.vendor, uid.serial, uid.ssid, ua_string, + uid.vduit[0] ? "." : "", uid.vduit); } dasd_put_device(device); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index bd89b032968a..229f23a30c5b 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1072,22 +1072,14 @@ static void dasd_eckd_read_fc_security(struct dasd_device *device) } } -static void dasd_eckd_get_uid_string(struct dasd_conf *conf, - char *print_uid) +static void dasd_eckd_get_uid_string(struct dasd_conf *conf, char *print_uid) { struct dasd_uid uid; create_uid(conf, &uid); - if (strlen(uid.vduit) > 0) - snprintf(print_uid, DASD_UID_STRLEN, - "%s.%s.%04x.%02x.%s", - uid.vendor, uid.serial, uid.ssid, - uid.real_unit_addr, uid.vduit); - else - snprintf(print_uid, DASD_UID_STRLEN, - "%s.%s.%04x.%02x", - uid.vendor, uid.serial, uid.ssid, - uid.real_unit_addr); + snprintf(print_uid, DASD_UID_STRLEN, "%s.%s.%04x.%02x%s%s", + uid.vendor, uid.serial, uid.ssid, uid.real_unit_addr, + uid.vduit[0] ? "." : "", uid.vduit); } static int dasd_eckd_check_cabling(struct dasd_device *device, -- cgit v1.2.3 From 9c386d0f6e04fee1b4161e49f8e9f5756bcab04c Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:41 +0100 Subject: s390/dasd: Use sysfs_emit() over sprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sysfs_emit() should be used in show() functions. There are still a couple of functions that use sprintf(). Replace outstanding occurrences of sprintf() in all show() functions with sysfs_emit(). Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 6297dfe6bc67..8a01afb5e3ce 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1114,7 +1114,7 @@ dasd_use_diag_show(struct device *dev, struct device_attribute *attr, char *buf) use_diag = (devmap->features & DASD_FEATURE_USEDIAG) != 0; else use_diag = (DASD_FEATURE_DEFAULT & DASD_FEATURE_USEDIAG) != 0; - return sprintf(buf, use_diag ? "1\n" : "0\n"); + return sysfs_emit(buf, use_diag ? "1\n" : "0\n"); } static ssize_t @@ -1163,7 +1163,7 @@ dasd_use_raw_show(struct device *dev, struct device_attribute *attr, char *buf) use_raw = (devmap->features & DASD_FEATURE_USERAW) != 0; else use_raw = (DASD_FEATURE_DEFAULT & DASD_FEATURE_USERAW) != 0; - return sprintf(buf, use_raw ? "1\n" : "0\n"); + return sysfs_emit(buf, use_raw ? "1\n" : "0\n"); } static ssize_t @@ -1259,7 +1259,7 @@ dasd_access_show(struct device *dev, struct device_attribute *attr, if (count < 0) return count; - return sprintf(buf, "%d\n", count); + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR(host_access_count, 0444, dasd_access_show, NULL); @@ -1338,19 +1338,19 @@ static ssize_t dasd_alias_show(struct device *dev, device = dasd_device_from_cdev(to_ccwdev(dev)); if (IS_ERR(device)) - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); if (device->discipline && device->discipline->get_uid && !device->discipline->get_uid(device, &uid)) { if (uid.type == UA_BASE_PAV_ALIAS || uid.type == UA_HYPER_PAV_ALIAS) { dasd_put_device(device); - return sprintf(buf, "1\n"); + return sysfs_emit(buf, "1\n"); } } dasd_put_device(device); - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); } static DEVICE_ATTR(alias, 0444, dasd_alias_show, NULL); @@ -1856,7 +1856,7 @@ static ssize_t dasd_pm_show(struct device *dev, device = dasd_device_from_cdev(to_ccwdev(dev)); if (IS_ERR(device)) - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); opm = dasd_path_get_opm(device); nppm = dasd_path_get_nppm(device); @@ -1866,8 +1866,8 @@ static ssize_t dasd_pm_show(struct device *dev, ifccpm = dasd_path_get_ifccpm(device); dasd_put_device(device); - return sprintf(buf, "%02x %02x %02x %02x %02x %02x\n", opm, nppm, - cablepm, cuirpm, hpfpm, ifccpm); + return sysfs_emit(buf, "%02x %02x %02x %02x %02x %02x\n", opm, nppm, + cablepm, cuirpm, hpfpm, ifccpm); } static DEVICE_ATTR(path_masks, 0444, dasd_pm_show, NULL); -- cgit v1.2.3 From 8d7ac904c90be7a1ed1aafa34953c40270c30bd0 Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:42 +0100 Subject: s390/dasd: Remove unnecessary errorstring generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In quite a few cases an errorstring is generated using snprintf() before it's passed to dev_err(). This indirection is unnecessary and all information can simply be passed directly to dev_err() instead. The errrorstring and ERRORLENGTH definitions are removed entirely. While at it, rephrase the error messages to provide more context where possible. Also, fix a few incorrectly used format specifier (e.g. %x02 -> %02x) in those messages. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-4-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 26 ++++++-------------------- drivers/s390/block/dasd_3990_erp.c | 8 ++------ drivers/s390/block/dasd_int.h | 3 --- 3 files changed, 8 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 7327e81352e9..a9aa2db3f4ab 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1301,7 +1301,6 @@ int dasd_term_IO(struct dasd_ccw_req *cqr) { struct dasd_device *device; int retries, rc; - char errorstring[ERRORLENGTH]; /* Check the cqr */ rc = dasd_check_cqr(cqr); @@ -1340,10 +1339,8 @@ int dasd_term_IO(struct dasd_ccw_req *cqr) rc = 0; break; default: - /* internal error 10 - unknown rc*/ - snprintf(errorstring, ERRORLENGTH, "10 %d", rc); - dev_err(&device->cdev->dev, "An error occurred in the " - "DASD device driver, reason=%s\n", errorstring); + dev_err(&device->cdev->dev, + "Unexpected error during request termination %d\n", rc); BUG(); break; } @@ -1362,7 +1359,6 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) { struct dasd_device *device; int rc; - char errorstring[ERRORLENGTH]; /* Check the cqr */ rc = dasd_check_cqr(cqr); @@ -1382,10 +1378,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) return -EPERM; } if (cqr->retries < 0) { - /* internal error 14 - start_IO run out of retries */ - sprintf(errorstring, "14 %p", cqr); - dev_err(&device->cdev->dev, "An error occurred in the DASD " - "device driver, reason=%s\n", errorstring); + dev_err(&device->cdev->dev, + "Start I/O ran out of retries %p\n", cqr); cqr->status = DASD_CQR_ERROR; return -EIO; } @@ -1463,11 +1457,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) "not accessible"); break; default: - /* internal error 11 - unknown rc */ - snprintf(errorstring, ERRORLENGTH, "11 %d", rc); dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", errorstring); + "Unexpected error during request start %d", rc); BUG(); break; } @@ -1904,8 +1895,6 @@ static void __dasd_device_process_ccw_queue(struct dasd_device *device, static void __dasd_process_cqr(struct dasd_device *device, struct dasd_ccw_req *cqr) { - char errorstring[ERRORLENGTH]; - switch (cqr->status) { case DASD_CQR_SUCCESS: cqr->status = DASD_CQR_DONE; @@ -1917,11 +1906,8 @@ static void __dasd_process_cqr(struct dasd_device *device, cqr->status = DASD_CQR_TERMINATED; break; default: - /* internal error 12 - wrong cqr status*/ - snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", errorstring); + "Unexpected CQR status %p %02x", cqr, cqr->status); BUG(); } if (cqr->callback) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 89957bb7244d..0705736acf09 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -398,7 +398,6 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense) struct dasd_device *device = erp->startdev; char msg_format = (sense[7] & 0xF0); char msg_no = (sense[7] & 0x0F); - char errorstring[ERRORLENGTH]; switch (msg_format) { case 0x00: /* Format 0 - Program or System Checks */ @@ -1004,12 +1003,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense) } break; - default: /* unknown message format - should not happen - internal error 03 - unknown message format */ - snprintf(errorstring, ERRORLENGTH, "03 %x02", msg_format); + default: dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", errorstring); + "Unknown message format %02x", msg_format); break; } /* end switch message format */ diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1b1b8a41c4d4..71d88e931090 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -113,9 +113,6 @@ do { \ __dev_id.ssid, __dev_id.devno, d_data); \ } while (0) -/* limit size for an errorstring */ -#define ERRORLENGTH 30 - /* definition of dbf debug levels */ #define DBF_EMERG 0 /* system is unusable */ #define DBF_ALERT 1 /* action must be taken immediately */ -- cgit v1.2.3 From 4849494f05994f411c9cc0504843c6adefd1f2de Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:43 +0100 Subject: s390/dasd: Move allocation error message to DBF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All error messages for a failling dasd_smalloc_request() call are logged via DBF, except one. There is no value in logging this particular allocation failure via dev_err(). Move the message to DBF, too, to be in line with the rest. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-5-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a9aa2db3f4ab..79697301fd83 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3963,10 +3963,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, NULL); if (IS_ERR(cqr)) { - /* internal error 13 - Allocating the RDC request failed*/ - dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", "13"); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + "Could not allocate RDC request"); return cqr; } -- cgit v1.2.3 From 32312cf229117b781bd02d93b0b11c8b47dfcc0a Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:44 +0100 Subject: s390/dasd: Remove unused message logging macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macros DEV_MESSAGE, MESSAGE, DEV_MESSAGE_LOG, and MESSAGE_LOG, are not used and there is no history anymore of any usage. Remove them. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-6-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_int.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 71d88e931090..a6c5f1fa2d87 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -123,32 +123,6 @@ do { \ #define DBF_INFO 6 /* informational */ #define DBF_DEBUG 6 /* debug-level messages */ -/* messages to be written via klogd and dbf */ -#define DEV_MESSAGE(d_loglevel,d_device,d_string,d_args...)\ -do { \ - printk(d_loglevel PRINTK_HEADER " %s: " d_string "\n", \ - dev_name(&d_device->cdev->dev), d_args); \ - DBF_DEV_EVENT(DBF_ALERT, d_device, d_string, d_args); \ -} while(0) - -#define MESSAGE(d_loglevel,d_string,d_args...)\ -do { \ - printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \ - DBF_EVENT(DBF_ALERT, d_string, d_args); \ -} while(0) - -/* messages to be written via klogd only */ -#define DEV_MESSAGE_LOG(d_loglevel,d_device,d_string,d_args...)\ -do { \ - printk(d_loglevel PRINTK_HEADER " %s: " d_string "\n", \ - dev_name(&d_device->cdev->dev), d_args); \ -} while(0) - -#define MESSAGE_LOG(d_loglevel,d_string,d_args...)\ -do { \ - printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \ -} while(0) - /* Macro to calculate number of blocks per page */ #define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize) -- cgit v1.2.3 From 0b3644b475e2564abe26a916af8447beb7c4966b Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:45 +0100 Subject: s390/dasd: Use dev_err() over printk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To reduce the information required for the string generation in the sense dump functions, use the more concise dev_err() variant over printk(KERN_ERR, ...) to improve code readability. The dev_err() function provides the component and device name for free and the separate dev_name() calls as well as the PRINTK_HEADER can be dropped. Dropping PRINTK_HEADER removes the "dasd(eckd):" for all lines. Only the first line of a dev_err() call is prefixed with the component and device (e.g. "dasd-eckd 0.0.95d0:"). The format specifier for printed pointers is also changed to unhashed (%px) as this can help with debugging and servicing. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-7-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 139 ++++++++++++++++------------------------- drivers/s390/block/dasd_fba.c | 55 +++++++--------- 2 files changed, 76 insertions(+), 118 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 229f23a30c5b..d9f776789429 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -37,11 +37,6 @@ #include "dasd_int.h" #include "dasd_eckd.h" -#ifdef PRINTK_HEADER -#undef PRINTK_HEADER -#endif /* PRINTK_HEADER */ -#define PRINTK_HEADER "dasd(eckd):" - /* * raw track access always map to 64k in memory * so it maps to 16 blocks of 4k per track @@ -5521,15 +5516,15 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) * and return number of printed chars. */ static void -dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) +dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from, + struct ccw1 *to, char *page) { int len, count; char *datap; len = 0; while (from <= to) { - len += sprintf(page + len, PRINTK_HEADER - " CCW %p: %08X %08X DAT:", + len += sprintf(page + len, "CCW %px: %08X %08X DAT:", from, ((int *) from)[0], ((int *) from)[1]); /* get pointer to data (consider IDALs) */ @@ -5552,7 +5547,7 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) from++; } if (len > 0) - printk(KERN_ERR "%s", page); + dev_err(&device->cdev->dev, "%s", page); } static void @@ -5583,9 +5578,12 @@ dasd_eckd_dump_sense_dbf(struct dasd_device *device, struct irb *irb, static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, struct dasd_ccw_req *req, struct irb *irb) { - char *page; struct ccw1 *first, *last, *fail, *from, *to; + struct device *dev; int len, sl, sct; + char *page; + + dev = &device->cdev->dev; page = (char *) get_zeroed_page(GFP_ATOMIC); if (page == NULL) { @@ -5594,24 +5592,18 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, return; } /* dump the sense data */ - len = sprintf(page, PRINTK_HEADER - " I/O status report for device %s:\n", - dev_name(&device->cdev->dev)); - len += sprintf(page + len, PRINTK_HEADER - " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X " - "CS:%02X RC:%d\n", + len = sprintf(page, "I/O status report:\n"); + len += sprintf(page + len, + "in req: %px CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X CS:%02X RC:%d\n", req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw), scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw), scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw), req ? req->intrc : 0); - len += sprintf(page + len, PRINTK_HEADER - " device %s: Failing CCW: %p\n", - dev_name(&device->cdev->dev), + len += sprintf(page + len, "Failing CCW: %px\n", phys_to_virt(irb->scsw.cmd.cpa)); if (irb->esw.esw0.erw.cons) { for (sl = 0; sl < 4; sl++) { - len += sprintf(page + len, PRINTK_HEADER - " Sense(hex) %2d-%2d:", + len += sprintf(page + len, "Sense(hex) %2d-%2d:", (8 * sl), ((8 * sl) + 7)); for (sct = 0; sct < 8; sct++) { @@ -5623,23 +5615,20 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, if (irb->ecw[27] & DASD_SENSE_BIT_0) { /* 24 Byte Sense Data */ - sprintf(page + len, PRINTK_HEADER - " 24 Byte: %x MSG %x, " - "%s MSGb to SYSOP\n", + sprintf(page + len, + "24 Byte: %x MSG %x, %s MSGb to SYSOP\n", irb->ecw[7] >> 4, irb->ecw[7] & 0x0f, irb->ecw[1] & 0x10 ? "" : "no"); } else { /* 32 Byte Sense Data */ - sprintf(page + len, PRINTK_HEADER - " 32 Byte: Format: %x " - "Exception class %x\n", + sprintf(page + len, + "32 Byte: Format: %x Exception class %x\n", irb->ecw[6] & 0x0f, irb->ecw[22] >> 4); } } else { - sprintf(page + len, PRINTK_HEADER - " SORRY - NO VALID SENSE AVAILABLE\n"); + sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n"); } - printk(KERN_ERR "%s", page); + dev_err(dev, "%s", page); if (req) { /* req == NULL for unsolicited interrupts */ @@ -5648,8 +5637,8 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, first = req->cpaddr; for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); to = min(first + 6, last); - printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req); - dasd_eckd_dump_ccw_range(first, to, page); + dev_err(dev, "Related CP in req: %px\n", req); + dasd_eckd_dump_ccw_range(device, first, to, page); /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ @@ -5657,19 +5646,19 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, fail = phys_to_virt(irb->scsw.cmd.cpa); /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ - printk(KERN_ERR PRINTK_HEADER "......\n"); + dev_err(dev, "......\n"); } to = min(fail + 1, last); - dasd_eckd_dump_ccw_range(from, to, page + len); + dasd_eckd_dump_ccw_range(device, from, to, page + len); /* print last CCWs (maximum 2) */ len = 0; from = max(from, ++to); if (from < last - 1) { from = last - 1; /* there is a gap - print header */ - printk(KERN_ERR PRINTK_HEADER "......\n"); + dev_err(dev, "......\n"); } - dasd_eckd_dump_ccw_range(from, last, page + len); + dasd_eckd_dump_ccw_range(device, from, last, page + len); } free_page((unsigned long) page); } @@ -5693,11 +5682,9 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, return; } /* dump the sense data */ - len = sprintf(page, PRINTK_HEADER - " I/O status report for device %s:\n", - dev_name(&device->cdev->dev)); - len += sprintf(page + len, PRINTK_HEADER - " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X " + len = sprintf(page, "I/O status report:\n"); + len += sprintf(page + len, + "in req: %px CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X " "CS:%02X fcxs:%02X schxs:%02X RC:%d\n", req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw), scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw), @@ -5705,9 +5692,7 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, irb->scsw.tm.fcxs, (irb->scsw.tm.ifob << 7) | irb->scsw.tm.sesq, req ? req->intrc : 0); - len += sprintf(page + len, PRINTK_HEADER - " device %s: Failing TCW: %p\n", - dev_name(&device->cdev->dev), + len += sprintf(page + len, "Failing TCW: %px\n", phys_to_virt(irb->scsw.tm.tcw)); tsb = NULL; @@ -5716,47 +5701,37 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw)); if (tsb) { - len += sprintf(page + len, PRINTK_HEADER - " tsb->length %d\n", tsb->length); - len += sprintf(page + len, PRINTK_HEADER - " tsb->flags %x\n", tsb->flags); - len += sprintf(page + len, PRINTK_HEADER - " tsb->dcw_offset %d\n", tsb->dcw_offset); - len += sprintf(page + len, PRINTK_HEADER - " tsb->count %d\n", tsb->count); + len += sprintf(page + len, "tsb->length %d\n", tsb->length); + len += sprintf(page + len, "tsb->flags %x\n", tsb->flags); + len += sprintf(page + len, "tsb->dcw_offset %d\n", tsb->dcw_offset); + len += sprintf(page + len, "tsb->count %d\n", tsb->count); residual = tsb->count - 28; - len += sprintf(page + len, PRINTK_HEADER - " residual %d\n", residual); + len += sprintf(page + len, "residual %d\n", residual); switch (tsb->flags & 0x07) { case 1: /* tsa_iostat */ - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.iostat.dev_time %d\n", + len += sprintf(page + len, "tsb->tsa.iostat.dev_time %d\n", tsb->tsa.iostat.dev_time); - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.iostat.def_time %d\n", + len += sprintf(page + len, "tsb->tsa.iostat.def_time %d\n", tsb->tsa.iostat.def_time); - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.iostat.queue_time %d\n", + len += sprintf(page + len, "tsb->tsa.iostat.queue_time %d\n", tsb->tsa.iostat.queue_time); - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.iostat.dev_busy_time %d\n", + len += sprintf(page + len, "tsb->tsa.iostat.dev_busy_time %d\n", tsb->tsa.iostat.dev_busy_time); - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.iostat.dev_act_time %d\n", + len += sprintf(page + len, "tsb->tsa.iostat.dev_act_time %d\n", tsb->tsa.iostat.dev_act_time); sense = tsb->tsa.iostat.sense; break; case 2: /* ts_ddpc */ - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.ddpc.rc %d\n", tsb->tsa.ddpc.rc); + len += sprintf(page + len, "tsb->tsa.ddpc.rc %d\n", + tsb->tsa.ddpc.rc); for (sl = 0; sl < 2; sl++) { - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.ddpc.rcq %2d-%2d: ", + len += sprintf(page + len, + "tsb->tsa.ddpc.rcq %2d-%2d: ", (8 * sl), ((8 * sl) + 7)); rcq = tsb->tsa.ddpc.rcq; for (sct = 0; sct < 8; sct++) { - len += sprintf(page + len, " %02x", + len += sprintf(page + len, "%02x", rcq[8 * sl + sct]); } len += sprintf(page + len, "\n"); @@ -5764,15 +5739,15 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, sense = tsb->tsa.ddpc.sense; break; case 3: /* tsa_intrg */ - len += sprintf(page + len, PRINTK_HEADER - " tsb->tsa.intrg.: not supported yet\n"); + len += sprintf(page + len, + "tsb->tsa.intrg.: not supported yet\n"); break; } if (sense) { for (sl = 0; sl < 4; sl++) { - len += sprintf(page + len, PRINTK_HEADER - " Sense(hex) %2d-%2d:", + len += sprintf(page + len, + "Sense(hex) %2d-%2d:", (8 * sl), ((8 * sl) + 7)); for (sct = 0; sct < 8; sct++) { len += sprintf(page + len, " %02x", @@ -5783,27 +5758,23 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, if (sense[27] & DASD_SENSE_BIT_0) { /* 24 Byte Sense Data */ - sprintf(page + len, PRINTK_HEADER - " 24 Byte: %x MSG %x, " - "%s MSGb to SYSOP\n", + sprintf(page + len, + "24 Byte: %x MSG %x, %s MSGb to SYSOP\n", sense[7] >> 4, sense[7] & 0x0f, sense[1] & 0x10 ? "" : "no"); } else { /* 32 Byte Sense Data */ - sprintf(page + len, PRINTK_HEADER - " 32 Byte: Format: %x " - "Exception class %x\n", + sprintf(page + len, + "32 Byte: Format: %x Exception class %x\n", sense[6] & 0x0f, sense[22] >> 4); } } else { - sprintf(page + len, PRINTK_HEADER - " SORRY - NO VALID SENSE AVAILABLE\n"); + sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n"); } } else { - sprintf(page + len, PRINTK_HEADER - " SORRY - NO TSB DATA AVAILABLE\n"); + sprintf(page + len, "SORRY - NO TSB DATA AVAILABLE\n"); } - printk(KERN_ERR "%s", page); + dev_err(&device->cdev->dev, "%s", page); free_page((unsigned long) page); } diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index c06fa2b27120..045e548630df 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -25,11 +25,6 @@ #include "dasd_int.h" #include "dasd_fba.h" -#ifdef PRINTK_HEADER -#undef PRINTK_HEADER -#endif /* PRINTK_HEADER */ -#define PRINTK_HEADER "dasd(fba):" - #define FBA_DEFAULT_RETRIES 32 #define DASD_FBA_CCW_WRITE 0x41 @@ -660,30 +655,27 @@ static void dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, struct irb *irb) { - char *page; struct ccw1 *act, *end, *last; int len, sl, sct, count; + struct device *dev; + char *page; + + dev = &device->cdev->dev; page = (char *) get_zeroed_page(GFP_ATOMIC); if (page == NULL) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", - "No memory to dump sense data"); + "No memory to dump sense data"); return; } - len = sprintf(page, PRINTK_HEADER - " I/O status report for device %s:\n", - dev_name(&device->cdev->dev)); - len += sprintf(page + len, PRINTK_HEADER - " in req: %p CS: 0x%02X DS: 0x%02X\n", req, - irb->scsw.cmd.cstat, irb->scsw.cmd.dstat); - len += sprintf(page + len, PRINTK_HEADER - " device %s: Failing CCW: %p\n", - dev_name(&device->cdev->dev), + len = sprintf(page, "I/O status report:\n"); + len += sprintf(page + len, "in req: %px CS: 0x%02X DS: 0x%02X\n", + req, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat); + len += sprintf(page + len, "Failing CCW: %px\n", (void *) (addr_t) irb->scsw.cmd.cpa); if (irb->esw.esw0.erw.cons) { for (sl = 0; sl < 4; sl++) { - len += sprintf(page + len, PRINTK_HEADER - " Sense(hex) %2d-%2d:", + len += sprintf(page + len, "Sense(hex) %2d-%2d:", (8 * sl), ((8 * sl) + 7)); for (sct = 0; sct < 8; sct++) { @@ -693,20 +685,18 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, len += sprintf(page + len, "\n"); } } else { - len += sprintf(page + len, PRINTK_HEADER - " SORRY - NO VALID SENSE AVAILABLE\n"); + len += sprintf(page + len, "SORRY - NO VALID SENSE AVAILABLE\n"); } - printk(KERN_ERR "%s", page); + dev_err(dev, "%s", page); /* dump the Channel Program */ /* print first CCWs (maximum 8) */ act = req->cpaddr; - for (last = act; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); + for (last = act; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); end = min(act + 8, last); - len = sprintf(page, PRINTK_HEADER " Related CP in req: %p\n", req); + len = sprintf(page, "Related CP in req: %px\n", req); while (act <= end) { - len += sprintf(page + len, PRINTK_HEADER - " CCW %p: %08X %08X DAT:", + len += sprintf(page + len, "CCW %px: %08X %08X DAT:", act, ((int *) act)[0], ((int *) act)[1]); for (count = 0; count < 32 && count < act->count; count += sizeof(int)) @@ -716,19 +706,17 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, len += sprintf(page + len, "\n"); act++; } - printk(KERN_ERR "%s", page); - + dev_err(dev, "%s", page); /* print failing CCW area */ len = 0; if (act < ((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa) - 2) { act = ((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa) - 2; - len += sprintf(page + len, PRINTK_HEADER "......\n"); + len += sprintf(page + len, "......\n"); } end = min((struct ccw1 *)(addr_t) irb->scsw.cmd.cpa + 2, last); while (act <= end) { - len += sprintf(page + len, PRINTK_HEADER - " CCW %p: %08X %08X DAT:", + len += sprintf(page + len, "CCW %px: %08X %08X DAT:", act, ((int *) act)[0], ((int *) act)[1]); for (count = 0; count < 32 && count < act->count; count += sizeof(int)) @@ -742,11 +730,10 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, /* print last CCWs */ if (act < last - 2) { act = last - 2; - len += sprintf(page + len, PRINTK_HEADER "......\n"); + len += sprintf(page + len, "......\n"); } while (act <= last) { - len += sprintf(page + len, PRINTK_HEADER - " CCW %p: %08X %08X DAT:", + len += sprintf(page + len, "CCW %px: %08X %08X DAT:", act, ((int *) act)[0], ((int *) act)[1]); for (count = 0; count < 32 && count < act->count; count += sizeof(int)) @@ -757,7 +744,7 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, act++; } if (len > 0) - printk(KERN_ERR "%s", page); + dev_err(dev, "%s", page); free_page((unsigned long) page); } -- cgit v1.2.3 From 4ba6366dbb03c2a58d0e7cbe140942bcf715006c Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:46 +0100 Subject: s390/dasd: Remove %p format specifier from error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Printing pointer in error messages doesn't add any value since the addresses are hashed. Remove the %p format specifier and adapt the error messages slightly. Replace %p with %px in ERP to get the actual addresses since ERP is used for debugging purposes only anyway. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-8-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 20 ++++++++------------ drivers/s390/block/dasd_3990_erp.c | 14 +++++--------- drivers/s390/block/dasd_erp.c | 4 ++-- 3 files changed, 15 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 79697301fd83..b3cdf254ce69 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1379,7 +1379,7 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) } if (cqr->retries < 0) { dev_err(&device->cdev->dev, - "Start I/O ran out of retries %p\n", cqr); + "Start I/O ran out of retries\n"); cqr->status = DASD_CQR_ERROR; return -EIO; } @@ -1907,7 +1907,7 @@ static void __dasd_process_cqr(struct dasd_device *device, break; default: dev_err(&device->cdev->dev, - "Unexpected CQR status %p %02x", cqr, cqr->status); + "Unexpected CQR status %02x", cqr->status); BUG(); } if (cqr->callback) @@ -1972,16 +1972,14 @@ static void __dasd_device_check_expire(struct dasd_device *device) if (device->discipline->term_IO(cqr) != 0) { /* Hmpf, try again in 5 sec */ dev_err(&device->cdev->dev, - "cqr %p timed out (%lus) but cannot be " - "ended, retrying in 5 s\n", - cqr, (cqr->expires/HZ)); + "CQR timed out (%lus) but cannot be ended, retrying in 5s\n", + (cqr->expires / HZ)); cqr->expires += 5*HZ; dasd_device_set_timer(device, 5*HZ); } else { dev_err(&device->cdev->dev, - "cqr %p timed out (%lus), %i retries " - "remaining\n", cqr, (cqr->expires/HZ), - cqr->retries); + "CQR timed out (%lus), %i retries remaining\n", + (cqr->expires / HZ), cqr->retries); } __dasd_device_check_autoquiesce_timeout(device, cqr); } @@ -2102,8 +2100,7 @@ int dasd_flush_device_queue(struct dasd_device *device) if (rc) { /* unable to terminate requeust */ dev_err(&device->cdev->dev, - "Flushing the DASD request queue " - "failed for request %p\n", cqr); + "Flushing the DASD request queue failed\n"); /* stop flush processing */ goto finished; } @@ -2619,8 +2616,7 @@ static int __dasd_cancel_req(struct dasd_ccw_req *cqr) rc = device->discipline->term_IO(cqr); if (rc) { dev_err(&device->cdev->dev, - "Cancelling request %p failed with rc=%d\n", - cqr, rc); + "Cancelling request failed with rc=%d\n", rc); } else { cqr->stopclk = get_tod_clock(); } diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 0705736acf09..b39c0798a110 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2659,7 +2659,7 @@ dasd_3990_erp_further_erp(struct dasd_ccw_req *erp) * necessary */ dev_err(&device->cdev->dev, - "ERP %p has run out of retries and failed\n", erp); + "ERP %px has run out of retries and failed\n", erp); erp->status = DASD_CQR_FAILED; } @@ -2782,11 +2782,9 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) "ERP chain at BEGINNING of ERP-ACTION\n"); for (temp_erp = cqr; temp_erp != NULL; temp_erp = temp_erp->refers) { - dev_err(&device->cdev->dev, - "ERP %p (%02x) refers to %p\n", - temp_erp, temp_erp->status, - temp_erp->refers); + "ERP %px (%02x) refers to %px\n", + temp_erp, temp_erp->status, temp_erp->refers); } } @@ -2833,11 +2831,9 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) "ERP chain at END of ERP-ACTION\n"); for (temp_erp = erp; temp_erp != NULL; temp_erp = temp_erp->refers) { - dev_err(&device->cdev->dev, - "ERP %p (%02x) refers to %p\n", - temp_erp, temp_erp->status, - temp_erp->refers); + "ERP %px (%02x) refers to %px\n", + temp_erp, temp_erp->status, temp_erp->refers); } } diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index c07e6e713518..c462f01d22e3 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -170,12 +170,12 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb) device = cqr->startdev; if (cqr->intrc == -ETIMEDOUT) { dev_err(&device->cdev->dev, - "A timeout error occurred for cqr %p\n", cqr); + "A timeout error occurred for cqr %px\n", cqr); return; } if (cqr->intrc == -ENOLINK) { dev_err(&device->cdev->dev, - "A transport error occurred for cqr %p\n", cqr); + "A transport error occurred for cqr %px\n", cqr); return; } /* dump sense data */ -- cgit v1.2.3 From c6c6c69df6598aacf3921f26f89b12d5e321ea83 Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:47 +0100 Subject: s390/dasd: Remove PRINTK_HEADER and KMSG_COMPONENT definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRINTK_HEADER was mainly used to prefix log messages with the module name. Most components don't use this definition anymore. Either because there are no log messages being generated anymore, or pr_*() were replaced by dev_*(), which contains device and component information already. PRINTK_HEADER is also dropped in the function dasd_3990_erp_handle_match_erp() in dasd_3990_erp.c from a panic() call as panic() already provides all relevant information. KMSG_COMPONENT was mainly used to identify a component in a long gone kernel message catalog feature. Remove both definition since they're either not used or alternatives make the code slightly shorter and more readable. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-9-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 3 --- drivers/s390/block/dasd_3990_erp.c | 7 +------ drivers/s390/block/dasd_alias.c | 8 -------- drivers/s390/block/dasd_devmap.c | 4 ---- drivers/s390/block/dasd_diag.c | 4 ---- drivers/s390/block/dasd_eckd.c | 2 -- drivers/s390/block/dasd_eer.c | 7 ------- drivers/s390/block/dasd_erp.c | 5 ----- drivers/s390/block/dasd_genhd.c | 5 ----- drivers/s390/block/dasd_ioctl.c | 6 ------ drivers/s390/block/dasd_proc.c | 5 ----- 11 files changed, 1 insertion(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index b3cdf254ce69..d79e54279158 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -30,9 +30,6 @@ #include #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd:" - #include "dasd_int.h" /* * SECTION: Constant definitions to be used within this file diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index b39c0798a110..c59a961cfdd2 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -7,13 +7,9 @@ * */ -#define KMSG_COMPONENT "dasd-eckd" - #include #include -#define PRINTK_HEADER "dasd_erp(3990): " - #include "dasd_int.h" #include "dasd_eckd.h" @@ -2700,8 +2696,7 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head, while (erp_done != erp) { if (erp_done == NULL) /* end of chain reached */ - panic(PRINTK_HEADER "Programming error in ERP! The " - "original request was lost\n"); + panic("Programming error in ERP! The original request was lost\n"); /* remove the request from the device queue */ list_del(&erp_done->blocklist); diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index c9740ae88d1a..e84cd5436556 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -6,20 +6,12 @@ * Author(s): Stefan Weinhuber */ -#define KMSG_COMPONENT "dasd-eckd" - #include #include #include #include "dasd_int.h" #include "dasd_eckd.h" -#ifdef PRINTK_HEADER -#undef PRINTK_HEADER -#endif /* PRINTK_HEADER */ -#define PRINTK_HEADER "dasd(eckd):" - - /* * General concept of alias management: * - PAV and DASD alias management is specific to the eckd discipline. diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 8a01afb5e3ce..0316c20823ee 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -13,8 +13,6 @@ * */ -#define KMSG_COMPONENT "dasd" - #include #include #include @@ -24,8 +22,6 @@ #include #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd_devmap:" #define DASD_MAX_PARAMS 256 #include "dasd_int.h" diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 2e4e555b37c3..041088c7e909 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -8,8 +8,6 @@ * */ -#define KMSG_COMPONENT "dasd" - #include #include #include @@ -31,8 +29,6 @@ #include "dasd_int.h" #include "dasd_diag.h" -#define PRINTK_HEADER "dasd(diag):" - MODULE_LICENSE("GPL"); /* The maximum number of blocks per request (max_blocks) is dependent on the diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index d9f776789429..8aade17d885c 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -10,8 +10,6 @@ * Author.........: Nigel Hislop */ -#define KMSG_COMPONENT "dasd-eckd" - #include #include #include diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index c956de711cf7..5064a616e041 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -7,8 +7,6 @@ * Author(s): Stefan Weinhuber */ -#define KMSG_COMPONENT "dasd-eckd" - #include #include #include @@ -28,11 +26,6 @@ #include "dasd_int.h" #include "dasd_eckd.h" -#ifdef PRINTK_HEADER -#undef PRINTK_HEADER -#endif /* PRINTK_HEADER */ -#define PRINTK_HEADER "dasd(eer):" - /* * SECTION: the internal buffer */ diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index c462f01d22e3..4c0d3a704513 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -9,8 +9,6 @@ * */ -#define KMSG_COMPONENT "dasd" - #include #include @@ -18,9 +16,6 @@ #include #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd_erp:" - #include "dasd_int.h" struct dasd_ccw_req * diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 55e3abe94cde..30e8ee583e98 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -11,8 +11,6 @@ * */ -#define KMSG_COMPONENT "dasd" - #include #include #include @@ -20,9 +18,6 @@ #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd_gendisk:" - #include "dasd_int.h" static unsigned int queue_depth = 32; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 61b9675e2a67..6814354a23ef 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -10,8 +10,6 @@ * i/o controls for the dasd driver. */ -#define KMSG_COMPONENT "dasd" - #include #include #include @@ -24,12 +22,8 @@ #include #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd_ioctl:" - #include "dasd_int.h" - static int dasd_ioctl_api_version(void __user *argp) { diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 62a859ea67f8..0faaa437d9be 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -11,8 +11,6 @@ * */ -#define KMSG_COMPONENT "dasd" - #include #include #include @@ -23,9 +21,6 @@ #include #include -/* This is ugly... */ -#define PRINTK_HEADER "dasd_proc:" - #include "dasd_int.h" static struct proc_dir_entry *dasd_proc_root_entry = NULL; -- cgit v1.2.3 From 79ae56fc475869d636071f66d9e4ef2a3819eee6 Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Thu, 8 Feb 2024 17:42:48 +0100 Subject: s390/dasd: Use dev_*() for device log messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All log messages in dasd.c use the printk variants of pr_*(). They all add the name of the affected device manually to the log message. This can be simplified by using the dev_*() variants of printk, which include the device information and make a separate call to dev_name() unnecessary. The KMSG_COMPONENT and the pr_fmt() definition can be dropped. Note that this removes the "dasd: " prefix from the one pr_info() call in dasd_init(). However, the log message already provides all relevant information. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240208164248.540985-10-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 50 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d79e54279158..fdb6cb8d8abf 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -8,9 +8,6 @@ * Copyright IBM Corp. 1999, 2009 */ -#define KMSG_COMPONENT "dasd" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include @@ -3381,8 +3378,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) ret = ccw_device_set_online(cdev); if (ret) - pr_warn("%s: Setting the DASD online failed with rc=%d\n", - dev_name(&cdev->dev), ret); + dev_warn(&cdev->dev, "Setting the DASD online failed with rc=%d\n", ret); } /* @@ -3469,8 +3465,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, { struct dasd_discipline *discipline; struct dasd_device *device; + struct device *dev; int rc; + dev = &cdev->dev; + /* first online clears initial online feature flag */ dasd_set_feature(cdev, DASD_FEATURE_INITIAL_ONLINE, 0); device = dasd_create_device(cdev); @@ -3483,11 +3482,10 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Try to load the required module. */ rc = request_module(DASD_DIAG_MOD); if (rc) { - pr_warn("%s Setting the DASD online failed " - "because the required module %s " - "could not be loaded (rc=%d)\n", - dev_name(&cdev->dev), DASD_DIAG_MOD, - rc); + dev_warn(dev, "Setting the DASD online failed " + "because the required module %s " + "could not be loaded (rc=%d)\n", + DASD_DIAG_MOD, rc); dasd_delete_device(device); return -ENODEV; } @@ -3495,8 +3493,7 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* Module init could have failed, so check again here after * request_module(). */ if (!dasd_diag_discipline_pointer) { - pr_warn("%s Setting the DASD online failed because of missing DIAG discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of missing DIAG discipline\n"); dasd_delete_device(device); return -ENODEV; } @@ -3517,8 +3514,8 @@ int dasd_generic_set_online(struct ccw_device *cdev, /* check_device will allocate block device if necessary */ rc = discipline->check_device(device); if (rc) { - pr_warn("%s Setting the DASD online with discipline %s failed with rc=%i\n", - dev_name(&cdev->dev), discipline->name, rc); + dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", + discipline->name, rc); module_put(discipline->owner); module_put(base_discipline->owner); dasd_delete_device(device); @@ -3527,16 +3524,15 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_set_target_state(device, DASD_STATE_ONLINE); if (device->state <= DASD_STATE_KNOWN) { - pr_warn("%s Setting the DASD online failed because of a missing discipline\n", - dev_name(&cdev->dev)); + dev_warn(dev, "Setting the DASD online failed because of a missing discipline\n"); rc = -ENODEV; dasd_set_target_state(device, DASD_STATE_NEW); if (device->block) dasd_free_block(device->block); dasd_delete_device(device); - } else - pr_debug("dasd_generic device %s found\n", - dev_name(&cdev->dev)); + } else { + dev_dbg(dev, "dasd_generic device found\n"); + } wait_event(dasd_init_waitq, _wait_for_device(device)); @@ -3547,10 +3543,13 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online); int dasd_generic_set_offline(struct ccw_device *cdev) { + int max_count, open_count, rc; struct dasd_device *device; struct dasd_block *block; - int max_count, open_count, rc; unsigned long flags; + struct device *dev; + + dev = &cdev->dev; rc = 0; spin_lock_irqsave(get_ccwdev_lock(cdev), flags); @@ -3571,11 +3570,10 @@ int dasd_generic_set_offline(struct ccw_device *cdev) open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) - pr_warn("%s: The DASD cannot be set offline with open count %i\n", - dev_name(&cdev->dev), open_count); + dev_warn(dev, "The DASD cannot be set offline with open count %i\n", + open_count); else - pr_warn("%s: The DASD cannot be set offline while it is in use\n", - dev_name(&cdev->dev)); + dev_warn(dev, "The DASD cannot be set offline while it is in use\n"); rc = -EBUSY; goto out_err; } @@ -3935,8 +3933,8 @@ static int dasd_handle_autoquiesce(struct dasd_device *device, if (dasd_eer_enabled(device)) dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); - pr_info("%s: The DASD has been put in the quiesce state\n", - dev_name(&device->cdev->dev)); + dev_info(&device->cdev->dev, + "The DASD has been put in the quiesce state\n"); dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); if (device->features & DASD_FEATURE_REQUEUEQUIESCE) -- cgit v1.2.3 From 14509b748ff58df3f0980b1cd70ade0e4a805e99 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 30 Jan 2024 13:21:34 +0900 Subject: null_blk: add configfs variable shared_tags Allow setting shared_tags through configfs, which could only be set as a module parameter. For that purpose, delay tag_set initialization from null_init() to null_add_dev(). Refer tag_set.ops as the flag to check if tag_set is initialized or not. The following parameters can not be set through configfs yet: timeout requeue init_hctx Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240130042134.2463659-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 38 ++++++++++++++++++++------------------ drivers/block/null_blk/null_blk.h | 1 + 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 36755f263e8e..4281371c81fe 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -165,8 +165,8 @@ static bool g_blocking; module_param_named(blocking, g_blocking, bool, 0444); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); -static bool shared_tags; -module_param(shared_tags, bool, 0444); +static bool g_shared_tags; +module_param_named(shared_tags, g_shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); static bool g_shared_tag_bitmap; @@ -426,6 +426,7 @@ NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); +NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) @@ -571,6 +572,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, &nullb_device_attr_no_sched, + &nullb_device_attr_shared_tags, &nullb_device_attr_shared_tag_bitmap, NULL, }; @@ -653,10 +655,11 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page) "badblocks,blocking,blocksize,cache_size," "completion_nsec,discard,home_node,hw_queue_depth," "irqmode,max_sectors,mbps,memory_backed,no_sched," - "poll_queues,power,queue_mode,shared_tag_bitmap,size," - "submit_queues,use_per_node_hctx,virt_boundary,zoned," - "zone_capacity,zone_max_active,zone_max_open," - "zone_nr_conv,zone_offline,zone_readonly,zone_size\n"); + "poll_queues,power,queue_mode,shared_tag_bitmap," + "shared_tags,size,submit_queues,use_per_node_hctx," + "virt_boundary,zoned,zone_capacity,zone_max_active," + "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," + "zone_size\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -738,6 +741,7 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_max_active = g_zone_max_active; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; + dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; return dev; } @@ -2124,7 +2128,14 @@ static int null_add_dev(struct nullb_device *dev) goto out_free_nullb; if (dev->queue_mode == NULL_Q_MQ) { - if (shared_tags) { + if (dev->shared_tags) { + if (!tag_set.ops) { + rv = null_init_tag_set(NULL, &tag_set); + if (rv) { + tag_set.ops = NULL; + goto out_cleanup_queues; + } + } nullb->tag_set = &tag_set; rv = 0; } else { @@ -2311,18 +2322,12 @@ static int __init null_init(void) g_submit_queues = 1; } - if (g_queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(NULL, &tag_set); - if (ret) - return ret; - } - config_group_init(&nullb_subsys.su_group); mutex_init(&nullb_subsys.su_mutex); ret = configfs_register_subsystem(&nullb_subsys); if (ret) - goto err_tagset; + return ret; mutex_init(&lock); @@ -2349,9 +2354,6 @@ err_dev: unregister_blkdev(null_major, "nullb"); err_conf: configfs_unregister_subsystem(&nullb_subsys); -err_tagset: - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); return ret; } @@ -2370,7 +2372,7 @@ static void __exit null_exit(void) } mutex_unlock(&lock); - if (g_queue_mode == NULL_Q_MQ && shared_tags) + if (tag_set.ops) blk_mq_free_tag_set(&tag_set); } diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 929f659dd255..7bcfc0922ae8 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -119,6 +119,7 @@ struct nullb_device { bool zoned; /* if device is zoned */ bool virt_boundary; /* virtual boundary on/off for the device */ bool no_sched; /* no IO scheduler for the device */ + bool shared_tags; /* share tag set between devices for blk-mq */ bool shared_tag_bitmap; /* use hostwide shared tags */ }; -- cgit v1.2.3 From 1df0f512faa71f1e106f36529ceff52f48209e30 Mon Sep 17 00:00:00 2001 From: Jan Höppner Date: Fri, 9 Feb 2024 13:45:21 +0100 Subject: s390/dasd: Improve ERP error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some ERP errors still share the same message format and only add different reason codes to it. These reason codes don't have any meaning anymore. Make the individual error messages more explicit and remove the reason codes altogether. Comments around the error messages are also removed as they provide no additional value anymore with more explicit messages. Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240209124522.3697827-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_3990_erp.c | 51 ++++++++++---------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index c59a961cfdd2..459b7f8ac883 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1048,11 +1048,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense) set_bit(DASD_CQR_SUPPRESS_CR, &erp->refers->flags); erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); } else { - /* fatal error - set status to FAILED - internal error 09 - Command Reject */ if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags)) dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, reason=09\n"); + "An I/O command request was rejected\n"); erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); } @@ -1120,13 +1118,7 @@ dasd_3990_erp_equip_check(struct dasd_ccw_req * erp, char *sense) erp->function = dasd_3990_erp_equip_check; if (sense[1] & SNS1_WRITE_INHIBITED) { - dev_info(&device->cdev->dev, - "Write inhibited path encountered\n"); - - /* vary path offline - internal error 04 - Path should be varied off-line.*/ - dev_err(&device->cdev->dev, "An error occurred in the DASD " - "device driver, reason=%s\n", "04"); + dev_err(&device->cdev->dev, "Write inhibited path encountered\n"); erp = dasd_3990_erp_action_1(erp); @@ -1277,11 +1269,7 @@ dasd_3990_erp_inv_format(struct dasd_ccw_req * erp, char *sense) erp = dasd_3990_erp_action_4(erp, sense); } else { - /* internal error 06 - The track format is not valid*/ - dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", "06"); - + dev_err(&device->cdev->dev, "Track format is not valid\n"); erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); } @@ -1655,9 +1643,8 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) sizeof(struct LO_eckd_data), device); if (IS_ERR(erp)) { - /* internal error 01 - Unable to allocate ERP */ - dev_err(&device->cdev->dev, "An error occurred in the DASD " - "device driver, reason=%s\n", "01"); + DBF_DEV_EVENT(DBF_ERR, device, "%s", + "Unable to allocate ERP request (1B 32)"); return dasd_3990_erp_cleanup(default_erp, DASD_CQR_FAILED); } @@ -1799,10 +1786,8 @@ dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense) cpa = previous_erp->irb.scsw.cmd.cpa; if (cpa == 0) { - /* internal error 02 - - Unable to determine address of the CCW to be restarted */ - dev_err(&device->cdev->dev, "An error occurred in the DASD " - "device driver, reason=%s\n", "02"); + dev_err(&device->cdev->dev, + "Unable to determine address of to be restarted CCW\n"); previous_erp->status = DASD_CQR_FAILED; @@ -2001,15 +1986,9 @@ dasd_3990_erp_compound_config(struct dasd_ccw_req * erp, char *sense) { if ((sense[25] & DASD_SENSE_BIT_1) && (sense[26] & DASD_SENSE_BIT_2)) { - - /* set to suspended duplex state then restart - internal error 05 - Set device to suspended duplex state - should be done */ struct dasd_device *device = erp->startdev; dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", "05"); - + "Compound configuration error occurred\n"); } erp->function = dasd_3990_erp_compound_config; @@ -2145,10 +2124,9 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense) erp = dasd_3990_erp_int_req(erp); break; - case 0x0F: /* length mismatch during update write command - internal error 08 - update write command error*/ - dev_err(&device->cdev->dev, "An error occurred in the " - "DASD device driver, reason=%s\n", "08"); + case 0x0F: + dev_err(&device->cdev->dev, + "Update write command error occurred\n"); erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); break; @@ -2157,12 +2135,9 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense) erp = dasd_3990_erp_action_10_32(erp, sense); break; - case 0x15: /* next track outside defined extend - internal error 07 - The next track is not - within the defined storage extent */ + case 0x15: dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", "07"); + "Track outside defined extent error occurred\n"); erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); break; -- cgit v1.2.3 From c3116e62ddeff79cae342147753ce596f01fcf06 Mon Sep 17 00:00:00 2001 From: Miroslav Franc Date: Fri, 9 Feb 2024 13:45:22 +0100 Subject: s390/dasd: fix double module refcount decrement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once the discipline is associated with the device, deleting the device takes care of decrementing the module's refcount. Doing it manually on this error path causes refcount to artificially decrease on each error while it should just stay the same. Fixes: c020d722b110 ("s390/dasd: fix panic during offline processing") Signed-off-by: Miroslav Franc Signed-off-by: Jan Höppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20240209124522.3697827-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fdb6cb8d8abf..2f3adf5d8fee 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3503,12 +3503,11 @@ int dasd_generic_set_online(struct ccw_device *cdev, dasd_delete_device(device); return -EINVAL; } + device->base_discipline = base_discipline; if (!try_module_get(discipline->owner)) { - module_put(base_discipline->owner); dasd_delete_device(device); return -EINVAL; } - device->base_discipline = base_discipline; device->discipline = discipline; /* check_device will allocate block device if necessary */ @@ -3516,8 +3515,6 @@ int dasd_generic_set_online(struct ccw_device *cdev, if (rc) { dev_warn(dev, "Setting the DASD online with discipline %s failed with rc=%i\n", discipline->name, rc); - module_put(discipline->owner); - module_put(base_discipline->owner); dasd_delete_device(device); return rc; } -- cgit v1.2.3 From 30d5297862410418bb8f8b4c0a87fa55c3063dd7 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 4 Feb 2024 18:30:43 +0100 Subject: power: supply: mm8013: select REGMAP_I2C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver uses regmap APIs so it should make sure they are available. Fixes: c75f4bf6800b ("power: supply: Introduce MM8013 fuel gauge driver") Cc: Signed-off-by: Thomas Weißschuh Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240204-mm8013-regmap-v1-1-7cc6b619b7d3@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index f21cb05815ec..3e31375491d5 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2 config FUEL_GAUGE_MM8013 tristate "Mitsumi MM8013 fuel gauge driver" depends on I2C + select REGMAP_I2C help Say Y here to enable the Mitsumi MM8013 fuel gauge driver. It enables the monitoring of many battery parameters, including -- cgit v1.2.3 From 218082010aceb40b5495ebc30028ede6e30ee755 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Sun, 28 Jan 2024 23:52:17 -0800 Subject: dm: dm-zoned: guard blkdev_zone_mgmt with noio scope Guard the calls to blkdev_zone_mgmt() with a memalloc_noio scope. This helps us getting rid of the GFP_NOIO argument to blkdev_zone_mgmt(); Signed-off-by: Johannes Thumshirn Reviewed-by: Mike Snitzer Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240128-zonefs_nofs-v3-2-ae3b7c8def61@wdc.com Signed-off-by: Jens Axboe --- drivers/md/dm-zoned-metadata.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index fdfe30f7b697..165996cc966c 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1655,10 +1655,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { struct dmz_dev *dev = zone->dev; + unsigned int noio_flag; + noio_flag = memalloc_noio_save(); ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), - zmd->zone_nr_sectors, GFP_NOIO); + zmd->zone_nr_sectors, GFP_KERNEL); + memalloc_noio_restore(noio_flag); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", zone->id, ret); -- cgit v1.2.3 From 71f4ecdbb42addf82b01b734b122a02707fed521 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Sun, 28 Jan 2024 23:52:20 -0800 Subject: block: remove gfp_flags from blkdev_zone_mgmt Now that all callers pass in GFP_KERNEL to blkdev_zone_mgmt() and use memalloc_no{io,fs}_{save,restore}() to define the allocation scope, we can drop the gfp_mask parameter from blkdev_zone_mgmt() as well as blkdev_zone_reset_all() and blkdev_zone_reset_all_emulated(). Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20240128-zonefs_nofs-v3-5-ae3b7c8def61@wdc.com Signed-off-by: Jens Axboe --- block/blk-zoned.c | 19 ++++++++----------- drivers/md/dm-zoned-metadata.c | 2 +- drivers/nvme/target/zns.c | 5 ++--- fs/btrfs/zoned.c | 14 +++++--------- fs/f2fs/segment.c | 4 ++-- fs/zonefs/super.c | 2 +- include/linux/blkdev.h | 2 +- 7 files changed, 20 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index d343e5756a9c..d4f4f8325eff 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -177,8 +177,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, } } -static int blkdev_zone_reset_all_emulated(struct block_device *bdev, - gfp_t gfp_mask) +static int blkdev_zone_reset_all_emulated(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; sector_t capacity = bdev_nr_sectors(bdev); @@ -205,7 +204,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev, } bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC, - gfp_mask); + GFP_KERNEL); bio->bi_iter.bi_sector = sector; sector += zone_sectors; @@ -223,7 +222,7 @@ out_free_need_reset: return ret; } -static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) +static int blkdev_zone_reset_all(struct block_device *bdev) { struct bio bio; @@ -238,7 +237,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) * @sector: Start sector of the first zone to operate on * @nr_sectors: Number of sectors, should be at least the length of one zone and * must be zone size aligned. - * @gfp_mask: Memory allocation flags (for bio_alloc) * * Description: * Perform the specified operation on the range of zones specified by @@ -248,7 +246,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) * or finish request. */ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sector, sector_t nr_sectors, gfp_t gfp_mask) + sector_t sector, sector_t nr_sectors) { struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors = bdev_zone_sectors(bdev); @@ -285,12 +283,12 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, */ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) { if (!blk_queue_zone_resetall(q)) - return blkdev_zone_reset_all_emulated(bdev, gfp_mask); - return blkdev_zone_reset_all(bdev, gfp_mask); + return blkdev_zone_reset_all_emulated(bdev); + return blkdev_zone_reset_all(bdev); } while (sector < end_sector) { - bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask); + bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL); bio->bi_iter.bi_sector = sector; sector += zone_sectors; @@ -419,8 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, return -ENOTTY; } - ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: if (cmd == BLKRESETZONE) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 165996cc966c..8156881a31de 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1660,7 +1660,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) noio_flag = memalloc_noio_save(); ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), - zmd->zone_nr_sectors, GFP_KERNEL); + zmd->zone_nr_sectors); memalloc_noio_restore(noio_flag); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 5b5c1e481722..3148d9f1bde6 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -456,8 +456,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req) switch (zsa_req_op(req->cmd->zms.zsa)) { case REQ_OP_ZONE_RESET: ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0, - get_capacity(req->ns->bdev->bd_disk), - GFP_KERNEL); + get_capacity(req->ns->bdev->bd_disk)); if (ret < 0) return blkdev_zone_mgmt_errno_to_nvme_status(ret); break; @@ -508,7 +507,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w) goto out; } - ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL); + ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors); if (ret < 0) status = blkdev_zone_mgmt_errno_to_nvme_status(ret); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 05640d61e435..cf2e779d8ef4 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -830,8 +830,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - reset->start, reset->len, - GFP_KERNEL); + reset->start, reset->len); memalloc_nofs_restore(nofs_flags); if (ret) return ret; @@ -984,7 +983,7 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, zone->start, - zone->len, GFP_KERNEL); + zone->len); memalloc_nofs_restore(nofs_flags); if (ret) return ret; @@ -1023,8 +1022,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, zone_start_sector(sb_zone, bdev), - zone_sectors * BTRFS_NR_SB_LOG_ZONES, - GFP_KERNEL); + zone_sectors * BTRFS_NR_SB_LOG_ZONES); memalloc_nofs_restore(nofs_flags); return ret; } @@ -1143,8 +1141,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, *bytes = 0; nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, - physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT, - GFP_KERNEL); + physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); if (ret) return ret; @@ -2258,8 +2255,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, physical >> SECTOR_SHIFT, - zinfo->zone_size >> SECTOR_SHIFT, - GFP_KERNEL); + zinfo->zone_size >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); if (ret) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0094fe491364..e1065ba70207 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1977,7 +1977,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, trace_f2fs_issue_reset_zone(bdev, blkstart); nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sector, nr_sects, GFP_KERNEL); + sector, nr_sects); memalloc_nofs_restore(nofs_flags); return ret; } @@ -4921,7 +4921,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, - zone->start, zone->len, GFP_KERNEL); + zone->start, zone->len); memalloc_nofs_restore(nofs_flags); if (ret == -EOPNOTSUPP) { ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 63fbac018c04..cadb1364f951 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -113,7 +113,7 @@ static int zonefs_zone_mgmt(struct super_block *sb, trace_zonefs_zone_mgmt(sb, z, op); ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, - z->z_size >> SECTOR_SHIFT, GFP_KERNEL); + z->z_size >> SECTOR_SHIFT); if (ret) { zonefs_err(sb, "Zone management operation %s at %llu failed %d\n", diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d7cac3de65b3..fac580976e3a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -325,7 +325,7 @@ void disk_set_zoned(struct gendisk *disk); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); + sector_t sectors, sector_t nr_sectors); int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)); -- cgit v1.2.3 From 921e81db524d17db683cc29aed7ff02f06ea3f96 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Thu, 1 Feb 2024 18:31:26 +0530 Subject: nvme: allow integrity when PI is not in first bytes NVM command set 1.0 (or later) mandates PI to be in the last bytes of metadata. But this was not supported in the block-layer, and driver registered a nop profile. Since block-integrity can now handle flexible PI offset, change the driver to support this configuration. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240201130126.211402-4-joshi.k@samsung.com Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 +++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d124a8ca9c3..6e7f9b13fba2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1771,6 +1771,7 @@ static void nvme_init_integrity(struct gendisk *disk, } integrity.tuple_size = head->ms; + integrity.pi_offset = head->pi_offset; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } @@ -1880,11 +1881,16 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, free_data: kfree(nvm); set_pi: - if (head->pi_size && (first || head->ms == head->pi_size)) + if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else head->pi_type = 0; + if (first) + head->pi_offset = 0; + else + head->pi_offset = head->ms - head->pi_size; + return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3897334e3950..4a484fc8a073 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -463,6 +463,7 @@ struct nvme_ns_head { u16 ms; u16 pi_size; u8 pi_type; + u8 pi_offset; u8 guard_type; u16 sgs; u32 sws; -- cgit v1.2.3 From 6cf350658736681b9d6b0b6e58c5c76b235bb4c4 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Thu, 8 Feb 2024 16:55:56 +0800 Subject: md: fix kmemleak of rdev->serial If kobject_add() is fail in bind_rdev_to_array(), 'rdev->serial' will be alloc not be freed, and kmemleak occurs. unreferenced object 0xffff88815a350000 (size 49152): comm "mdadm", pid 789, jiffies 4294716910 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc f773277a): [<0000000058b0a453>] kmemleak_alloc+0x61/0xe0 [<00000000366adf14>] __kmalloc_large_node+0x15e/0x270 [<000000002e82961b>] __kmalloc_node.cold+0x11/0x7f [<00000000f206d60a>] kvmalloc_node+0x74/0x150 [<0000000034bf3363>] rdev_init_serial+0x67/0x170 [<0000000010e08fe9>] mddev_create_serial_pool+0x62/0x220 [<00000000c3837bf0>] bind_rdev_to_array+0x2af/0x630 [<0000000073c28560>] md_add_new_disk+0x400/0x9f0 [<00000000770e30ff>] md_ioctl+0x15bf/0x1c10 [<000000006cfab718>] blkdev_ioctl+0x191/0x3f0 [<0000000085086a11>] vfs_ioctl+0x22/0x60 [<0000000018b656fe>] __x64_sys_ioctl+0xba/0xe0 [<00000000e54e675e>] do_syscall_64+0x71/0x150 [<000000008b0ad622>] entry_SYSCALL_64_after_hwframe+0x6c/0x74 Fixes: 963c555e75b0 ("md: introduce mddev_create/destroy_wb_pool for the change of member device") Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240208085556.2412922-1-linan666@huaweicloud.com --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 45bb387d69c4..e2a5f513dbb7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2562,6 +2562,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) fail: pr_warn("md: failed to register dev-%s for %s\n", b, mdname(mddev)); + mddev_destroy_serial_pool(mddev, rdev); return err; } -- cgit v1.2.3 From cb4443f26b43efa54494b8de8a50457febb06940 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Wed, 7 Feb 2024 11:46:04 +0100 Subject: pinctrl: stm32: fix PM support for stm32mp257 The driver for stm32mp257 is missing the suspend callback in struct dev_pm_ops. Add the callback, using the common stm32_pinctrl_suspend() function. Signed-off-by: Antonio Borneo Fixes: 619f8ca4a73d ("pinctrl: stm32: add stm32mp257 pinctrl support") Link: https://lore.kernel.org/r/20240207104604.174843-1-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32mp257.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c index 73f091cd827e..23aebd4695e9 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32mp257.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32mp257.c @@ -2562,7 +2562,7 @@ static const struct of_device_id stm32mp257_pctrl_match[] = { }; static const struct dev_pm_ops stm32_pinctrl_dev_pm_ops = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, stm32_pinctrl_resume) + SET_LATE_SYSTEM_SLEEP_PM_OPS(stm32_pinctrl_suspend, stm32_pinctrl_resume) }; static struct platform_driver stm32mp257_pinctrl_driver = { -- cgit v1.2.3 From 3eece72ded7f67776731709702f3d1b9893b6a4f Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 30 Jan 2024 16:27:21 +0800 Subject: irqchip/loongson-eiointc: Skip handling if there is no pending interrupt eiointc_irq_dispatch() iterates over the pending bit registers of the interrupt controller and evaluates the result even if there is no interrupt pending in a particular 64bit chunk. Skip handling and especially the pointless write back for clearing the non-pending bits if a chunk is empty. [ tglx: Massaged changelog ] Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Acked-by: Huacai Chen Link: https://lore.kernel.org/r/20240130082722.2912576-3-maobibo@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 1623cd779175..fad22e21e1c8 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -198,6 +198,12 @@ static void eiointc_irq_dispatch(struct irq_desc *desc) for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) { pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3)); + + /* Skip handling if pending bitmap is zero */ + if (!pending) + continue; + + /* Clear the IRQs */ iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3)); while (pending) { int bit = __ffs(pending); -- cgit v1.2.3 From 83c0708719f77018cd3b98b0011c9526a3e0e2ca Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 30 Jan 2024 16:27:22 +0800 Subject: irqchip/loongson-eiointc: Remove explicit interrupt affinity restore on resume During suspend all CPUs except CPU0 are hot-unpluged and all active interrupts are migrated to CPU0. On resume eiointc_router_init() affines all interrupts to CPU0, so the subsequent explicit interrupt affinity restore is redundant. Remove it. [ tglx: Rewrote changelog ] Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240130082722.2912576-4-maobibo@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index fad22e21e1c8..405f622a26ad 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -310,23 +310,7 @@ static int eiointc_suspend(void) static void eiointc_resume(void) { - int i, j; - struct irq_desc *desc; - struct irq_data *irq_data; - eiointc_router_init(0); - - for (i = 0; i < nr_pics; i++) { - for (j = 0; j < eiointc_priv[0]->vec_count; j++) { - desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j); - if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { - raw_spin_lock(&desc->lock); - irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc)); - eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); - raw_spin_unlock(&desc->lock); - } - } - } } static struct syscore_ops eiointc_syscore_ops = { -- cgit v1.2.3 From 004c7a6bf43edbd4b092fb6ebba8991d56bc3428 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 9 Feb 2024 19:16:00 +0100 Subject: irqchip/bcm-6345-l1: Prefer struct_size)_ over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows. The cpu variable is a pointer to "struct bcm6345_l1_cpu" and this structure ends in a flexible array: struct bcm6345_l1_cpu { [...] u32 enable_cache[]; }; The preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. This way, the code is more readable and safer. Signed-off-by: Erick Archer Signed-off-by: Thomas Gleixner Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20240209181600.9472-1-erick.archer@gmx.com Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/162 [2] --- drivers/irqchip/irq-bcm6345-l1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c index 9745a119d0e6..eb02d203c963 100644 --- a/drivers/irqchip/irq-bcm6345-l1.c +++ b/drivers/irqchip/irq-bcm6345-l1.c @@ -242,7 +242,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn, else if (intc->n_words != n_words) return -EINVAL; - cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32), + cpu = intc->cpus[idx] = kzalloc(struct_size(cpu, enable_cache, n_words), GFP_KERNEL); if (!cpu) return -ENOMEM; -- cgit v1.2.3 From e955a71f83598a347eb45af5576e7eb6cb5bf285 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 9 Feb 2024 19:31:28 +0100 Subject: irqchip/irq-bcm7038-l1: Prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows. The cpu variable is a pointer to "struct bcm7038_l1_cpu" and this structure ends in a flexible array: struct bcm7038_l1_cpu { void __iomem *map_base; u32 mask_cache[]; }; The preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. This way, the code is more readable and more safer. Signed-off-by: Erick Archer Signed-off-by: Thomas Gleixner Reviewed-by: Florian Fainelli Reviewed-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240209183128.10273-1-erick.archer@gmx.com Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/162 [2] --- drivers/irqchip/irq-bcm7038-l1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 24ca1d656adc..36e71af054e9 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -249,7 +249,7 @@ static int __init bcm7038_l1_init_one(struct device_node *dn, return -EINVAL; } - cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32), + cpu = intc->cpus[idx] = kzalloc(struct_size(cpu, mask_cache, n_words), GFP_KERNEL); if (!cpu) return -ENOMEM; -- cgit v1.2.3 From ee4c1592b7e9a5bf89b962d7afd7e9b04c8d16ee Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jan 2024 09:52:39 +0100 Subject: irqchip/gic-v3-its: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be used instead of the deprecated ida_simple_get() and ida_simple_remove(). The upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_max() is inclusive. Adjust the code accordingly. Signed-off-by: Christophe JAILLET Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/3b472b0e7edf6e483b8b255cf8d1cb0163532adf.1705222332.git.christophe.jaillet@wanadoo.fr --- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d097001c1e3e..cd950f435cf0 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4419,12 +4419,12 @@ static const struct irq_domain_ops its_sgi_domain_ops = { static int its_vpe_id_alloc(void) { - return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL); + return ida_alloc_max(&its_vpeid_ida, ITS_MAX_VPEID - 1, GFP_KERNEL); } static void its_vpe_id_free(u16 id) { - ida_simple_remove(&its_vpeid_ida, id); + ida_free(&its_vpeid_ida, id); } static int its_vpe_init(struct its_vpe *vpe) -- cgit v1.2.3 From eb5555d422d0fc325e1574a7353d3c616f82d8b5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 25 Jan 2024 19:17:56 +0000 Subject: pmdomain: arm: Fix NULL dereference on scmi_perf_domain removal On unloading of the scmi_perf_domain module got the below splat, when in the DT provided to the system under test the '#power-domain-cells' property was missing. Indeed, this particular setup causes the probe to bail out early without giving any error, which leads to the ->remove() callback gets to run too, but without all the expected initialized structures in place. Add a check and bail out early on remove too. Call trace: scmi_perf_domain_remove+0x28/0x70 [scmi_perf_domain] scmi_dev_remove+0x28/0x40 [scmi_core] device_remove+0x54/0x90 device_release_driver_internal+0x1dc/0x240 driver_detach+0x58/0xa8 bus_remove_driver+0x78/0x108 driver_unregister+0x38/0x70 scmi_driver_unregister+0x28/0x180 [scmi_core] scmi_perf_domain_driver_exit+0x18/0xb78 [scmi_perf_domain] __arm64_sys_delete_module+0x1a8/0x2c0 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0xb8 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 Code: a90153f3 f9403c14 f9414800 955f8a05 (b9400a80) ---[ end trace 0000000000000000 ]--- Fixes: 2af23ceb8624 ("pmdomain: arm: Add the SCMI performance domain") Signed-off-by: Cristian Marussi Reviewed-by: Sudeep Holla Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240125191756.868860-1-cristian.marussi@arm.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/arm/scmi_perf_domain.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c index 709bbc448fad..d7ef46ccd9b8 100644 --- a/drivers/pmdomain/arm/scmi_perf_domain.c +++ b/drivers/pmdomain/arm/scmi_perf_domain.c @@ -159,6 +159,9 @@ static void scmi_perf_domain_remove(struct scmi_device *sdev) struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev); int i; + if (!scmi_pd_data) + return; + of_genpd_del_provider(dev->of_node); for (i = 0; i < scmi_pd_data->num_domains; i++) -- cgit v1.2.3 From fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:24:40 +0100 Subject: efi/capsule-loader: fix incorrect allocation size gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures is not enough for a 64-bit phys_addr_t: drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open': drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size] 295 | cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); | ^ Use the correct type instead here. Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping") Signed-off-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/capsule-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a814..97bafb5f7038 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); -- cgit v1.2.3 From e5d40e9afd84cec01cdbbbfe62d52f89959ab3ee Mon Sep 17 00:00:00 2001 From: Naresh Solanki Date: Tue, 13 Feb 2024 20:28:00 +0530 Subject: regulator: max5970: Fix regulator child node name Update regulator child node name to lower case i.e., sw0 & sw1 as descibed in max5970 dt binding. Signed-off-by: Naresh Solanki Link: https://msgid.link/r/20240213145801.2564518-1-naresh.solanki@9elements.com Signed-off-by: Mark Brown --- drivers/regulator/max5970-regulator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index 830a1c4cd705..8bbcd983a74a 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -29,8 +29,8 @@ struct max5970_regulator { }; enum max597x_regulator_id { - MAX597X_SW0, - MAX597X_SW1, + MAX597X_sw0, + MAX597X_sw1, }; static int max5970_read_adc(struct regmap *regmap, int reg, long *val) @@ -378,8 +378,8 @@ static int max597x_dt_parse(struct device_node *np, } static const struct regulator_desc regulators[] = { - MAX597X_SWITCH(SW0, MAX5970_REG_CHXEN, 0, "vss1"), - MAX597X_SWITCH(SW1, MAX5970_REG_CHXEN, 1, "vss2"), + MAX597X_SWITCH(sw0, MAX5970_REG_CHXEN, 0, "vss1"), + MAX597X_SWITCH(sw1, MAX5970_REG_CHXEN, 1, "vss2"), }; static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, -- cgit v1.2.3 From 7789bf05529889a39bcf4cd17a68521de063b88b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:59:07 +0100 Subject: floppy: fix function pointer cast warnings clang-16 complains about a control flow integrity (kcfi) violation casting between incompatible pointers: drivers/block/floppy.c:2001:11: error: cast from 'void (*)(void)' to 'done_f' (aka 'void (*)(int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 2001 | .done = (done_f)empty | ^~~~~~~~~~~~~ Just add another empty function with the correct prototype as a workaround. The warning is for code that was added before the start of the normal git history, but I tracked it done to an early change in the reconstructed linux-history.git. Fixes: 598a477afe06 ("Import 1.1.41") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213095918.455478-1-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d0e41d52d6a9..2ba0ba135951 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -530,14 +530,13 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); /* this is called after the interrupt of the * main command */ void (*redo)(void); /* this is called to retry the operation */ void (*error)(void); /* this is called to tally an error */ - done_f done; /* this is called to say if the operation has + void (*done)(int); /* this is called to say if the operation has * succeeded/failed */ } *cont; @@ -985,6 +984,10 @@ static void empty(void) { } +static void empty_done(int result) +{ +} + static void (*floppy_work_fn)(void); static void floppy_work_workfn(struct work_struct *work) @@ -1998,14 +2001,14 @@ static const struct cont_t wakeup_cont = { .interrupt = empty, .redo = do_wakeup, .error = empty, - .done = (done_f)empty + .done = empty_done, }; static const struct cont_t intr_cont = { .interrupt = empty, .redo = process_fd_request, .error = empty, - .done = (done_f)empty + .done = empty_done, }; /* schedules handler, waiting for completion. May be interrupted, will then -- cgit v1.2.3 From fe0b1e9a73d60f01fdc391925be74e823af7c91d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:03:01 +0100 Subject: drbd: fix function cast warnings in state machine There are four state machines in drbd that use a common infrastructure, with a cast to an incompatible function type in REMEMBER_STATE_CHANGE that clang-16 now warns about: drivers/block/drbd/drbd_state.c:1632:3: error: cast from 'int (*)(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type)' to 'typeof (last_func)' (aka 'int (*)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1632 | REMEMBER_STATE_CHANGE(notify_resource_state_change, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1633 | resource_state_change, NOTIFY_CHANGE); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/block/drbd/drbd_state.c:1619:17: note: expanded from macro 'REMEMBER_STATE_CHANGE' 1619 | last_func = (typeof(last_func))func; \ | ^~~~~~~~~~~~~~~~~~~~~~~ drivers/block/drbd/drbd_state.c:1641:4: error: cast from 'int (*)(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type)' to 'typeof (last_func)' (aka 'int (*)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1641 | REMEMBER_STATE_CHANGE(notify_connection_state_change, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1642 | connection_state_change, NOTIFY_CHANGE); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change these all to actually expect a void pointer to be passed, which matches the caller. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100354.457128-1-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_state.c | 24 ++++++++++++++---------- drivers/block/drbd/drbd_state_change.h | 8 ++++---- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 287a8d1d3f70..e858e7e0383f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_resource_state_change *resource_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_resource_state_change *resource_state_change = state_change; struct drbd_resource *resource = resource_state_change->resource; struct resource_info resource_info = { .res_role = resource_state_change->role[NEW], @@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb, int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_connection_state_change *connection_state_change, + void *state_change, enum drbd_notification_type type) { - struct drbd_connection *connection = connection_state_change->connection; + struct drbd_connection_state_change *p = state_change; + struct drbd_connection *connection = p->connection; struct connection_info connection_info = { - .conn_connection_state = connection_state_change->cstate[NEW], - .conn_role = connection_state_change->peer_role[NEW], + .conn_connection_state = p->cstate[NEW], + .conn_role = p->peer_role[NEW], }; return notify_connection_state(skb, seq, connection, &connection_info, type); @@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb, int notify_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_device_state_change *device_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_device_state_change *device_state_change = state_change; struct drbd_device *device = device_state_change->device; struct device_info device_info = { .dev_disk_state = device_state_change->disk_state[NEW], @@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb, int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_peer_device_state_change *p, + void *state_change, enum drbd_notification_type type) { + struct drbd_peer_device_state_change *p = state_change; struct drbd_peer_device *peer_device = p->peer_device; struct peer_device_info peer_device_info = { .peer_repl_state = p->repl_state[NEW], @@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - int (*last_func)(struct sk_buff *, unsigned int, void *, - enum drbd_notification_type) = NULL; + int (*last_func)(struct sk_buff *, unsigned int, + void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW]) @@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) }) #define REMEMBER_STATE_CHANGE(func, arg, type) \ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \ - last_func = (typeof(last_func))func; \ + last_func = func; \ last_arg = arg; \ }) diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index 9d78d8e3912e..a56a57d67686 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *); extern int notify_resource_state_change(struct sk_buff *, unsigned int, - struct drbd_resource_state_change *, + void *, enum drbd_notification_type type); extern int notify_connection_state_change(struct sk_buff *, unsigned int, - struct drbd_connection_state_change *, + void *, enum drbd_notification_type type); extern int notify_device_state_change(struct sk_buff *, unsigned int, - struct drbd_device_state_change *, + void *, enum drbd_notification_type type); extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, - struct drbd_peer_device_state_change *, + void *, enum drbd_notification_type type); #endif /* DRBD_STATE_CHANGE_H */ -- cgit v1.2.3 From 9ac4dd8c47d533eb420af6a679e66ec74771125c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:19 +0100 Subject: block: pass a queue_limits argument to blk_mq_init_queue Pass a queue_limits to blk_mq_init_queue and apply it if non-NULL. This will allow allocating queues with valid queue limits instead of setting the values one at a time later. Also rename the function to blk_mq_alloc_queue as that is a much better name for a function that allocates a queue and always pass the queuedata argument instead of having a separate version for the extra argument. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: John Garry Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-10-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 21 ++++++++------------- block/bsg-lib.c | 2 +- drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/core.c | 6 +++--- drivers/scsi/scsi_scan.c | 2 +- drivers/ufs/core/ufshcd.c | 2 +- include/linux/blk-mq.h | 3 ++- 7 files changed, 17 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/block/blk-mq.c b/block/blk-mq.c index 9dd8055cc524..f6499bbd89be 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4083,14 +4083,14 @@ void blk_mq_release(struct request_queue *q) blk_mq_sysfs_deinit(q); } -static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, - void *queuedata) +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata) { - struct queue_limits lim = { }; + struct queue_limits default_lim = { }; struct request_queue *q; int ret; - q = blk_alloc_queue(&lim, set->numa_node); + q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node); if (IS_ERR(q)) return q; q->queuedata = queuedata; @@ -4101,20 +4101,15 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, } return q; } - -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) -{ - return blk_mq_init_queue_data(set, NULL); -} -EXPORT_SYMBOL(blk_mq_init_queue); +EXPORT_SYMBOL(blk_mq_alloc_queue); /** * blk_mq_destroy_queue - shutdown a request queue * @q: request queue to shutdown * - * This shuts down a request queue allocated by blk_mq_init_queue(). All future + * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future * requests will be failed with -ENODEV. The caller is responsible for dropping - * the reference from blk_mq_init_queue() by calling blk_put_queue(). + * the reference from blk_mq_alloc_queue() by calling blk_put_queue(). * * Context: can sleep */ @@ -4141,7 +4136,7 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, struct request_queue *q; struct gendisk *disk; - q = blk_mq_init_queue_data(set, queuedata); + q = blk_mq_alloc_queue(set, NULL, queuedata); if (IS_ERR(q)) return ERR_CAST(q); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index b3acdbdb6e7e..bcc7dee6abce 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -383,7 +383,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, if (blk_mq_alloc_tag_set(set)) goto out_tag_set; - q = blk_mq_init_queue(set); + q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(q)) { ret = PTR_ERR(q); goto out_queue; diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index c727cd1f264b..a480cdeac288 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1516,7 +1516,7 @@ static int apple_nvme_probe(struct platform_device *pdev) goto put_dev; } - anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset); + anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL); if (IS_ERR(anv->ctrl.admin_q)) { ret = -ENOMEM; goto put_dev; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6e7f9b13fba2..5bcdf3654598 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4372,14 +4372,14 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; - ctrl->admin_q = blk_mq_init_queue(set); + ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); goto out_free_tagset; } if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->fabrics_q = blk_mq_init_queue(set); + ctrl->fabrics_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->fabrics_q)) { ret = PTR_ERR(ctrl->fabrics_q); goto out_cleanup_admin_q; @@ -4443,7 +4443,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, return ret; if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_init_queue(set); + ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->connect_q)) { ret = PTR_ERR(ctrl->connect_q); goto out_free_tag_set; diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 44680f65ea14..9969f4e2f1c3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -332,7 +332,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, sdev->sg_reserved_size = INT_MAX; - q = blk_mq_init_queue(&sdev->host->tag_set); + q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL); if (IS_ERR(q)) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 029d017fc1b6..c502a86db16b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10592,7 +10592,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) err = blk_mq_alloc_tag_set(&hba->tmf_tag_set); if (err < 0) goto out_remove_scsi_host; - hba->tmf_queue = blk_mq_init_queue(&hba->tmf_tag_set); + hba->tmf_queue = blk_mq_alloc_queue(&hba->tmf_tag_set, NULL, NULL); if (IS_ERR(hba->tmf_queue)) { err = PTR_ERR(hba->tmf_queue); goto free_tmf_tag_set; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7a8150a5f051..7d42c359e2ab 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -692,7 +692,8 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); -- cgit v1.2.3 From 27e32cd23fed1ab88098897897dcb9ec2bdba4de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:20 +0100 Subject: block: pass a queue_limits argument to blk_mq_alloc_disk Pass a queue_limits to blk_mq_alloc_disk and apply it if non-NULL. This will allow allocating queues with valid queue limits instead of setting the values one at a time later. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: John Garry Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-11-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 2 +- block/blk-mq.c | 5 +++-- drivers/block/amiflop.c | 2 +- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/ataflop.c | 2 +- drivers/block/floppy.c | 2 +- drivers/block/loop.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/nbd.c | 2 +- drivers/block/null_blk/main.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/rbd.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/block/sunvdc.c | 2 +- drivers/block/swim.c | 2 +- drivers/block/swim3.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/block/z2ram.c | 2 +- drivers/cdrom/gdrom.c | 2 +- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/core/queue.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/mtd/ubi/block.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/s390/block/scm_blk.c | 2 +- include/linux/blk-mq.h | 7 ++++--- 30 files changed, 35 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 92ee2697ff39..25f1b18ce7d4 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -906,7 +906,7 @@ static int ubd_add(int n, char **error_out) if (err) goto out; - disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev); + disk = blk_mq_alloc_disk(&ubd_dev->tag_set, NULL, ubd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; diff --git a/block/blk-mq.c b/block/blk-mq.c index f6499bbd89be..6abb4ce46baa 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4130,13 +4130,14 @@ void blk_mq_destroy_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_destroy_queue); -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass) { struct request_queue *q; struct gendisk *disk; - q = blk_mq_alloc_queue(set, NULL, queuedata); + q = blk_mq_alloc_queue(set, lim, queuedata); if (IS_ERR(q)) return ERR_CAST(q); diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 2b98114a9fe0..a25414228e47 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1779,7 +1779,7 @@ static int fd_alloc_disk(int drive, int system) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index b1b47d88f5db..2ff6e2da8cc4 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -371,7 +371,7 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - gd = blk_mq_alloc_disk(set, d); + gd = blk_mq_alloc_disk(set, NULL, d); if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 50949207798d..cacc4ba942a8 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1994,7 +1994,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2ba0ba135951..582cf50c6bf6 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4518,7 +4518,7 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - disk = blk_mq_alloc_disk(&tag_sets[drive], NULL); + disk = blk_mq_alloc_disk(&tag_sets[drive], NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8145499da38..3f855cc79c29 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2025,7 +2025,7 @@ static int loop_add(int i) if (err) goto out_free_idr; - disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, NULL, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b200950e8fb5..ac08dea73552 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3431,7 +3431,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - dd->disk = blk_mq_alloc_disk(&dd->tags, dd); + dd->disk = blk_mq_alloc_disk(&dd->tags, NULL, dd); if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 33a8f37bb6a1..30ae3cc12e77 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1823,7 +1823,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) if (err < 0) goto out_free_tags; - disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); + disk = blk_mq_alloc_disk(&nbd->tag_set, NULL, NULL); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_idr; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4281371c81fe..eeb895ec6f34 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2147,7 +2147,7 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_queues; nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); goto out_cleanup_tags; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 36d7b36c60c7..dfd3860df4f8 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -431,7 +431,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) if (error) goto fail_teardown; - gendisk = blk_mq_alloc_disk(&priv->tag_set, dev); + gendisk = blk_mq_alloc_disk(&priv->tag_set, NULL, dev); if (IS_ERR(gendisk)) { dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", __func__, __LINE__); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 00ca8a1d8c46..6b4f1898a722 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4966,7 +4966,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (err) return err; - disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev); + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, NULL, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 4044c369d22a..d51be4f2df61 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1408,7 +1408,7 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, dev->size = le64_to_cpu(rsp->nsectors) * le16_to_cpu(rsp->logical_block_size); - dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev); + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, NULL, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); dev->queue = dev->gd->queue; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7bf4b48e2282..a1f74dd1eae5 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -824,7 +824,7 @@ static int probe_disk(struct vdc_port *port) if (err) return err; - g = blk_mq_alloc_disk(&port->tag_set, port); + g = blk_mq_alloc_disk(&port->tag_set, NULL, port); if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); diff --git a/drivers/block/swim.c b/drivers/block/swim.c index f85b6af414b4..16bdf62067d8 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -820,7 +820,7 @@ static int swim_floppy_init(struct swim_priv *swd) goto exit_put_disks; swd->unit[drive].disk = - blk_mq_alloc_disk(&swd->unit[drive].tag_set, + blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL, &swd->unit[drive]); if (IS_ERR(swd->unit[drive].disk)) { blk_mq_free_tag_set(&swd->unit[drive].tag_set); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c2bc85826358..a04756ac778e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1210,7 +1210,7 @@ static int swim3_attach(struct macio_dev *mdev, if (rc) goto out_unregister; - disk = blk_mq_alloc_disk(&fs->tag_set, fs); + disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs); if (IS_ERR(disk)) { rc = PTR_ERR(disk); goto out_free_tag_set; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1dfb2e77898b..c5b655270798 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2222,7 +2222,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, NULL); + disk = blk_mq_alloc_disk(&ub->tag_set, NULL, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5bf98fd6a651..a23fce4eca44 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1330,7 +1330,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vq; - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); if (IS_ERR(vblk->disk)) { err = PTR_ERR(vblk->disk); goto out_free_tags; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 434fab306777..4cc2884e7484 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1136,7 +1136,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, info); + gd = blk_mq_alloc_disk(&info->tag_set, NULL, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 11493167b0a8..7c5f4e4d9b50 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -318,7 +318,7 @@ static int z2ram_register_disk(int minor) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&tag_set, NULL); + disk = blk_mq_alloc_disk(&tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index d668b174ace9..1d044779f5e4 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -778,7 +778,7 @@ static int probe_gdrom(struct platform_device *devptr) if (err) goto probe_fail_free_cd_info; - gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL); + gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL, NULL); if (IS_ERR(gd.disk)) { err = PTR_ERR(gd.disk); goto probe_fail_free_tag_set; diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 04115cd92433..d3277c901d16 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2093,7 +2093,7 @@ static int msb_init_disk(struct memstick_dev *card) if (rc) goto out_release_id; - msb->disk = blk_mq_alloc_disk(&msb->tag_set, card); + msb->disk = blk_mq_alloc_disk(&msb->tag_set, NULL, card); if (IS_ERR(msb->disk)) { rc = PTR_ERR(msb->disk); goto out_free_tag_set; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5a69ed33999b..db0e2a42ca3c 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1138,7 +1138,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) if (rc) goto out_release_id; - msb->disk = blk_mq_alloc_disk(&msb->tag_set, card); + msb->disk = blk_mq_alloc_disk(&msb->tag_set, NULL, card); if (IS_ERR(msb->disk)) { rc = PTR_ERR(msb->disk); goto out_free_tag_set; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index a0a2412f62a7..67ad186d132a 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -447,7 +447,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) return ERR_PTR(ret); - disk = blk_mq_alloc_disk(&mq->tag_set, mq); + disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); if (IS_ERR(disk)) { blk_mq_free_tag_set(&mq->tag_set); return disk; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index f0526dcc2162..b8878a2457af 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -333,7 +333,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) goto out_kfree_tag_set; /* Create gendisk */ - gd = blk_mq_alloc_disk(new->tag_set, new); + gd = blk_mq_alloc_disk(new->tag_set, NULL, new); if (IS_ERR(gd)) { ret = PTR_ERR(gd); goto out_free_tag_set; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 654bd7372cd8..9be87c231a2e 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -393,7 +393,7 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Initialize the gendisk of this ubiblock device */ - gd = blk_mq_alloc_disk(&dev->tag_set, dev); + gd = blk_mq_alloc_disk(&dev->tag_set, NULL, dev); if (IS_ERR(gd)) { ret = PTR_ERR(gd); goto out_free_tags; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5bcdf3654598..eed3e22e24d9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3694,7 +3694,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (!ns) return; - disk = blk_mq_alloc_disk(ctrl->tagset, ns); + disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns); if (IS_ERR(disk)) goto out_free_ns; disk->fops = &nvme_bdev_ops; diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 30e8ee583e98..0465b706745f 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -53,7 +53,7 @@ int dasd_gendisk_alloc(struct dasd_block *block) if (rc) return rc; - gdp = blk_mq_alloc_disk(&block->tag_set, block); + gdp = blk_mq_alloc_disk(&block->tag_set, NULL, block); if (IS_ERR(gdp)) { blk_mq_free_tag_set(&block->tag_set); return PTR_ERR(gdp); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index ade95e91b3c8..d05b2e2799a4 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -462,7 +462,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) if (ret) goto out; - bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev); + bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, NULL, scmdev); if (IS_ERR(bdev->gendisk)) { ret = PTR_ERR(bdev->gendisk); goto out_tag; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7d42c359e2ab..390d35fa0032 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -682,13 +682,14 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -- cgit v1.2.3 From 718628adfcfdc80466eb42cd9c615d1d5514f74c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:21 +0100 Subject: virtio_blk: split virtblk_probe Split out a virtblk_read_limits helper that just reads the various queue limits to separate it from the higher level probing logic. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Stefan Hajnoczi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 193 ++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 92 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a23fce4eca44..dd46ccd9f84c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1248,31 +1248,17 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); -static int virtblk_probe(struct virtio_device *vdev) +static int virtblk_read_limits(struct virtio_blk *vblk) { - struct virtio_blk *vblk; - struct request_queue *q; - int err, index; - + struct request_queue *q = vblk->disk->queue; + struct virtio_device *vdev = vblk->vdev; u32 v, blk_size, max_size, sg_elems, opt_io_size; u32 max_discard_segs = 0; u32 discard_granularity = 0; u16 min_io_size; u8 physical_block_exp, alignment_offset; - unsigned int queue_depth; size_t max_dma_size; - - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - - err = ida_alloc_range(&vd_index_ida, 0, - minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); - if (err < 0) - goto out; - index = err; + int err; /* We need to know how many segments before we allocate. */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, @@ -1286,73 +1272,6 @@ static int virtblk_probe(struct virtio_device *vdev) /* Prevent integer overflows and honor max vq size */ sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); - vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); - if (!vblk) { - err = -ENOMEM; - goto out_free_index; - } - - mutex_init(&vblk->vdev_mutex); - - vblk->vdev = vdev; - - INIT_WORK(&vblk->config_work, virtblk_config_changed_work); - - err = init_vq(vblk); - if (err) - goto out_free_vblk; - - /* Default queue sizing is to fill the ring. */ - if (!virtblk_queue_depth) { - queue_depth = vblk->vqs[0].vq->num_free; - /* ... but without indirect descs, we use 2 descs per req */ - if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) - queue_depth /= 2; - } else { - queue_depth = virtblk_queue_depth; - } - - memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); - vblk->tag_set.ops = &virtio_mq_ops; - vblk->tag_set.queue_depth = queue_depth; - vblk->tag_set.numa_node = NUMA_NO_NODE; - vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - vblk->tag_set.cmd_size = - sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; - vblk->tag_set.driver_data = vblk; - vblk->tag_set.nr_hw_queues = vblk->num_vqs; - vblk->tag_set.nr_maps = 1; - if (vblk->io_queues[HCTX_TYPE_POLL]) - vblk->tag_set.nr_maps = 3; - - err = blk_mq_alloc_tag_set(&vblk->tag_set); - if (err) - goto out_free_vq; - - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); - if (IS_ERR(vblk->disk)) { - err = PTR_ERR(vblk->disk); - goto out_free_tags; - } - q = vblk->disk->queue; - - virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); - - vblk->disk->major = major; - vblk->disk->first_minor = index_to_minor(index); - vblk->disk->minors = 1 << PART_BITS; - vblk->disk->private_data = vblk; - vblk->disk->fops = &virtblk_fops; - vblk->index = index; - - /* configure queue flush support */ - virtblk_update_cache_mode(vdev); - - /* If disk is read-only in the host, the guest should obey */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) - set_disk_ro(vblk->disk, 1); - /* We can handle whatever the host told us to handle. */ blk_queue_max_segments(q, sg_elems); @@ -1381,7 +1300,7 @@ static int virtblk_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "virtio_blk: invalid block size: 0x%x\n", blk_size); - goto out_cleanup_disk; + return err; } blk_queue_logical_block_size(q, blk_size); @@ -1455,8 +1374,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: secure_erase_sector_alignment can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } discard_granularity = min_not_zero(discard_granularity, v); @@ -1470,8 +1388,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_sectors can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } blk_queue_max_secure_erase_sectors(q, v); @@ -1485,8 +1402,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_seg can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } max_discard_segs = min_not_zero(max_discard_segs, v); @@ -1511,6 +1427,99 @@ static int virtblk_probe(struct virtio_device *vdev) q->limits.discard_granularity = blk_size; } + return 0; +} + +static int virtblk_probe(struct virtio_device *vdev) +{ + struct virtio_blk *vblk; + struct request_queue *q; + int err, index; + unsigned int queue_depth; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + err = ida_alloc_range(&vd_index_ida, 0, + minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); + if (err < 0) + goto out; + index = err; + + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); + if (!vblk) { + err = -ENOMEM; + goto out_free_index; + } + + mutex_init(&vblk->vdev_mutex); + + vblk->vdev = vdev; + + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); + + err = init_vq(vblk); + if (err) + goto out_free_vblk; + + /* Default queue sizing is to fill the ring. */ + if (!virtblk_queue_depth) { + queue_depth = vblk->vqs[0].vq->num_free; + /* ... but without indirect descs, we use 2 descs per req */ + if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) + queue_depth /= 2; + } else { + queue_depth = virtblk_queue_depth; + } + + memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); + vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.queue_depth = queue_depth; + vblk->tag_set.numa_node = NUMA_NO_NODE; + vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + vblk->tag_set.cmd_size = + sizeof(struct virtblk_req) + + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; + vblk->tag_set.driver_data = vblk; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; + vblk->tag_set.nr_maps = 1; + if (vblk->io_queues[HCTX_TYPE_POLL]) + vblk->tag_set.nr_maps = 3; + + err = blk_mq_alloc_tag_set(&vblk->tag_set); + if (err) + goto out_free_vq; + + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); + if (IS_ERR(vblk->disk)) { + err = PTR_ERR(vblk->disk); + goto out_free_tags; + } + q = vblk->disk->queue; + + virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); + + vblk->disk->major = major; + vblk->disk->first_minor = index_to_minor(index); + vblk->disk->minors = 1 << PART_BITS; + vblk->disk->private_data = vblk; + vblk->disk->fops = &virtblk_fops; + vblk->index = index; + + /* configure queue flush support */ + virtblk_update_cache_mode(vdev); + + /* If disk is read-only in the host, the guest should obey */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) + set_disk_ro(vblk->disk, 1); + + err = virtblk_read_limits(vblk); + if (err) + goto out_cleanup_disk; + virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); -- cgit v1.2.3 From 8b837256560c783415b42748959900befcde2d00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:22 +0100 Subject: virtio_blk: pass queue_limits to blk_mq_alloc_disk Call virtblk_read_limits and most of virtblk_probe_zoned_device before allocating the gendisk and thus request_queue and make them read into a queue_limits structure instead. Pass this initialized queue_limits to blk_mq_alloc_disk to set the queue up with the right parameters from the start and only leave a few final touches for zoned devices to be done just before adding the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Stefan Hajnoczi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-13-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 130 ++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 66 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd46ccd9f84c..d8b55874cd59 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -720,16 +720,15 @@ fail_report: return ret; } -static int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, - struct request_queue *q) +static int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { + struct virtio_device *vdev = vblk->vdev; u32 v, wg; dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - disk_set_zoned(vblk->disk); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + lim->zoned = true; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -747,8 +746,8 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, wg); - blk_queue_io_min(q, wg); + lim->physical_block_size = wg; + lim->io_min = wg; dev_dbg(&vdev->dev, "write granularity = %u\n", wg); @@ -764,13 +763,13 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, vblk->zone_sectors); return -ENODEV; } - blk_queue_chunk_sectors(q, vblk->zone_sectors); + lim->chunk_sectors = vblk->zone_sectors; dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { dev_warn(&vblk->vdev->dev, "ignoring negotiated F_DISCARD for zoned device\n"); - blk_queue_max_discard_sectors(q, 0); + lim->max_hw_discard_sectors = 0; } virtio_cread(vdev, struct virtio_blk_config, @@ -785,25 +784,21 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, wg, v); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, v); + lim->max_zone_append_sectors = v; dev_dbg(&vdev->dev, "max append sectors = %u\n", v); - return blk_revalidate_disk_zones(vblk->disk, NULL); + return 0; } - #else - /* - * Zoned block device support is not configured in this kernel. - * Host-managed zoned devices can't be supported, but others are - * good to go as regular block devices. + * Zoned block device support is not configured in this kernel, host-managed + * zoned devices can't be supported. */ #define virtblk_report_zones NULL - -static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, struct request_queue *q) +static inline int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - dev_err(&vdev->dev, + dev_err(&vblk->vdev->dev, "virtio_blk: zoned devices are not supported"); return -EOPNOTSUPP; } @@ -1248,9 +1243,9 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); -static int virtblk_read_limits(struct virtio_blk *vblk) +static int virtblk_read_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - struct request_queue *q = vblk->disk->queue; struct virtio_device *vdev = vblk->vdev; u32 v, blk_size, max_size, sg_elems, opt_io_size; u32 max_discard_segs = 0; @@ -1273,10 +1268,10 @@ static int virtblk_read_limits(struct virtio_blk *vblk) sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); /* We can handle whatever the host told us to handle. */ - blk_queue_max_segments(q, sg_elems); + lim->max_segments = sg_elems; /* No real sector limit. */ - blk_queue_max_hw_sectors(q, UINT_MAX); + lim->max_hw_sectors = UINT_MAX; max_dma_size = virtio_max_dma_size(vdev); max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size; @@ -1288,7 +1283,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (!err) max_size = min(max_size, v); - blk_queue_max_segment_size(q, max_size); + lim->max_segment_size = max_size; /* Host can optionally specify the block size of the device */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, @@ -1303,35 +1298,34 @@ static int virtblk_read_limits(struct virtio_blk *vblk) return err; } - blk_queue_logical_block_size(q, blk_size); + lim->logical_block_size = blk_size; } else - blk_size = queue_logical_block_size(q); + blk_size = lim->logical_block_size; /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, &physical_block_exp); if (!err && physical_block_exp) - blk_queue_physical_block_size(q, - blk_size * (1 << physical_block_exp)); + lim->physical_block_size = blk_size * (1 << physical_block_exp); err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, alignment_offset, &alignment_offset); if (!err && alignment_offset) - blk_queue_alignment_offset(q, blk_size * alignment_offset); + lim->alignment_offset = blk_size * alignment_offset; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, min_io_size, &min_io_size); if (!err && min_io_size) - blk_queue_io_min(q, blk_size * min_io_size); + lim->io_min = blk_size * min_io_size; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, opt_io_size, &opt_io_size); if (!err && opt_io_size) - blk_queue_io_opt(q, blk_size * opt_io_size); + lim->io_opt = blk_size * opt_io_size; if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, @@ -1339,7 +1333,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); - blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); + lim->max_hw_discard_sectors = v ? v : UINT_MAX; virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &max_discard_segs); @@ -1348,7 +1342,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v); - blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); + lim->max_write_zeroes_sectors = v ? v : UINT_MAX; } /* The discard and secure erase limits are combined since the Linux @@ -1391,7 +1385,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) return -EINVAL; } - blk_queue_max_secure_erase_sectors(q, v); + lim->max_secure_erase_sectors = v; virtio_cread(vdev, struct virtio_blk_config, max_secure_erase_seg, &v); @@ -1418,13 +1412,34 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (!max_discard_segs) max_discard_segs = sg_elems; - blk_queue_max_discard_segments(q, - min(max_discard_segs, MAX_DISCARD_SEGMENTS)); + lim->max_discard_segments = + min(max_discard_segs, MAX_DISCARD_SEGMENTS); if (discard_granularity) - q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; + lim->discard_granularity = + discard_granularity << SECTOR_SHIFT; else - q->limits.discard_granularity = blk_size; + lim->discard_granularity = blk_size; + } + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { + u8 model; + + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); + switch (model) { + case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + /* treat host-aware devices as non-zoned */ + return 0; + case VIRTIO_BLK_Z_HM: + err = virtblk_read_zoned_limits(vblk, lim); + if (err) + return err; + break; + default: + dev_err(&vdev->dev, "unsupported zone model %d\n", model); + return -EINVAL; + } } return 0; @@ -1433,7 +1448,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - struct request_queue *q; + struct queue_limits lim = { }; int err, index; unsigned int queue_depth; @@ -1493,12 +1508,15 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vq; - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); + err = virtblk_read_limits(vblk, &lim); + if (err) + goto out_free_tags; + + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk); if (IS_ERR(vblk->disk)) { err = PTR_ERR(vblk->disk); goto out_free_tags; } - q = vblk->disk->queue; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); @@ -1516,10 +1534,6 @@ static int virtblk_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) set_disk_ro(vblk->disk, 1); - err = virtblk_read_limits(vblk); - if (err) - goto out_cleanup_disk; - virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); @@ -1527,27 +1541,11 @@ static int virtblk_probe(struct virtio_device *vdev) * All steps that follow use the VQs therefore they need to be * placed after the virtio_device_ready() call above. */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { - u8 model; - - virtio_cread(vdev, struct virtio_blk_config, zoned.model, - &model); - switch (model) { - case VIRTIO_BLK_Z_NONE: - case VIRTIO_BLK_Z_HA: - /* Present the host-aware device as non-zoned */ - break; - case VIRTIO_BLK_Z_HM: - err = virtblk_probe_zoned_device(vdev, vblk, q); - if (err) - goto out_cleanup_disk; - break; - default: - dev_err(&vdev->dev, "unsupported zone model %d\n", - model); - err = -EINVAL; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); + err = blk_revalidate_disk_zones(vblk->disk, NULL); + if (err) goto out_cleanup_disk; - } } err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); -- cgit v1.2.3 From 65bdd16f8c72bb2178f5e4db40305bef6c96b309 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:23 +0100 Subject: loop: cleanup loop_config_discard Initialize the local variables for the discard max sectors and granularity to zero as a sensible default, and then merge the calls assigning them to the queue limits. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-14-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f855cc79c29..7abeb5869426 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -755,7 +755,8 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - u32 granularity, max_discard_sectors; + u32 granularity = 0, max_discard_sectors = 0; + struct kstatfs sbuf; /* * If the backing device is a block device, mirror its zeroing @@ -775,29 +776,17 @@ static void loop_config_discard(struct loop_device *lo) * We use punch hole to reclaim the free space used by the * image a.k.a. discard. */ - } else if (!file->f_op->fallocate) { - max_discard_sectors = 0; - granularity = 0; - - } else { - struct kstatfs sbuf; - + } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) { max_discard_sectors = UINT_MAX >> 9; - if (!vfs_statfs(&file->f_path, &sbuf)) - granularity = sbuf.f_bsize; - else - max_discard_sectors = 0; + granularity = sbuf.f_bsize; } - if (max_discard_sectors) { + blk_queue_max_discard_sectors(q, max_discard_sectors); + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + if (max_discard_sectors) q->limits.discard_granularity = granularity; - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - } else { + else q->limits.discard_granularity = 0; - blk_queue_max_discard_sectors(q, 0); - blk_queue_max_write_zeroes_sectors(q, 0); - } } struct loop_worker { -- cgit v1.2.3 From 02aed4a1f2c355e41d82a8e9831031ca9e0eb45d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:24 +0100 Subject: loop: pass queue_limits to blk_mq_alloc_disk Pass the max_hw_sector limit loop sets at initialization time directly to blk_mq_alloc_disk instead of updating it right after the allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-15-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7abeb5869426..26c8ea790867 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1971,6 +1971,12 @@ static const struct blk_mq_ops loop_mq_ops = { static int loop_add(int i) { + struct queue_limits lim = { + /* + * Random number picked from the historic block max_sectors cap. + */ + .max_hw_sectors = 2560u, + }; struct loop_device *lo; struct gendisk *disk; int err; @@ -2014,16 +2020,13 @@ static int loop_add(int i) if (err) goto out_free_idr; - disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, NULL, lo); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; } lo->lo_queue = lo->lo_disk->queue; - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(lo->lo_queue, 2560u); - /* * By default, we do buffer IO, so it doesn't make sense to enable * merge because the I/O submitted to backing file is handled page by -- cgit v1.2.3 From 473516b361936cbc27d7728df649a5b3094b6170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:25 +0100 Subject: loop: use the atomic queue limits update API Pass the default limits to blk_mq_alloc_disk and then use the queue_limits_{start,commit}_update API to change the limits in an atomic way on existing loop gendisks. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-16-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 26c8ea790867..28a95fd366fe 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -750,11 +750,11 @@ static void loop_sysfs_exit(struct loop_device *lo) &loop_attribute_group); } -static void loop_config_discard(struct loop_device *lo) +static void loop_config_discard(struct loop_device *lo, + struct queue_limits *lim) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; - struct request_queue *q = lo->lo_queue; u32 granularity = 0, max_discard_sectors = 0; struct kstatfs sbuf; @@ -781,12 +781,12 @@ static void loop_config_discard(struct loop_device *lo) granularity = sbuf.f_bsize; } - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + lim->max_hw_discard_sectors = max_discard_sectors; + lim->max_write_zeroes_sectors = max_discard_sectors; if (max_discard_sectors) - q->limits.discard_granularity = granularity; + lim->discard_granularity = granularity; else - q->limits.discard_granularity = 0; + lim->discard_granularity = 0; } struct loop_worker { @@ -975,6 +975,20 @@ loop_set_status_from_info(struct loop_device *lo, return 0; } +static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize, + bool update_discard_settings) +{ + struct queue_limits lim; + + lim = queue_limits_start_update(lo->lo_queue); + lim.logical_block_size = bsize; + lim.physical_block_size = bsize; + lim.io_min = bsize; + if (update_discard_settings) + loop_config_discard(lo, &lim); + return queue_limits_commit_update(lo->lo_queue, &lim); +} + static int loop_configure(struct loop_device *lo, blk_mode_t mode, struct block_device *bdev, const struct loop_config *config) @@ -1072,11 +1086,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, else bsize = 512; - blk_queue_logical_block_size(lo->lo_queue, bsize); - blk_queue_physical_block_size(lo->lo_queue, bsize); - blk_queue_io_min(lo->lo_queue, bsize); + error = loop_reconfigure_limits(lo, bsize, true); + if (WARN_ON_ONCE(error)) + goto out_unlock; - loop_config_discard(lo); loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1143,9 +1156,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) lo->lo_offset = 0; lo->lo_sizelimit = 0; memset(lo->lo_file_name, 0, LO_NAME_SIZE); - blk_queue_logical_block_size(lo->lo_queue, 512); - blk_queue_physical_block_size(lo->lo_queue, 512); - blk_queue_io_min(lo->lo_queue, 512); + loop_reconfigure_limits(lo, 512, false); invalidate_disk(lo->lo_disk); loop_sysfs_exit(lo); /* let user-space know about this change */ @@ -1477,9 +1488,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) invalidate_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - blk_queue_logical_block_size(lo->lo_queue, arg); - blk_queue_physical_block_size(lo->lo_queue, arg); - blk_queue_io_min(lo->lo_queue, arg); + err = loop_reconfigure_limits(lo, arg, false); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); -- cgit v1.2.3 From ff3206d2186d84e4f77e1378ba1d225633f17b9b Mon Sep 17 00:00:00 2001 From: Ivan Semenov Date: Tue, 6 Feb 2024 19:28:45 +0200 Subject: mmc: core: Fix eMMC initialization with 1-bit bus connection Initializing an eMMC that's connected via a 1-bit bus is current failing, if the HW (DT) informs that 4-bit bus is supported. In fact this is a regression, as we were earlier capable of falling back to 1-bit mode, when switching to 4/8-bit bus failed. Therefore, let's restore the behaviour. Log for Samsung eMMC 5.1 chip connected via 1bit bus (only D0 pin) Before patch: [134509.044225] mmc0: switch to bus width 4 failed [134509.044509] mmc0: new high speed MMC card at address 0001 [134509.054594] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134509.281602] mmc0: switch to bus width 4 failed [134509.282638] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.282657] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284598] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.284602] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284609] ldm_validate_partition_table(): Disk read failed. [134509.286495] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.286500] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.288303] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.288308] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289540] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.289544] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289553] mmcblk0: unable to read partition table [134509.289728] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134509.290283] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB [134509.294577] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 [134509.295835] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.295841] Buffer I/O error on dev mmcblk0, logical block 0, async page read After patch: [134551.089613] mmc0: switch to bus width 4 failed [134551.090377] mmc0: new high speed MMC card at address 0001 [134551.102271] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134551.113365] mmcblk0: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 p13 p14 p15 p16 p17 p18 p19 p20 p21 [134551.114262] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134551.114925] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB Fixes: 577fb13199b1 ("mmc: rework selection of bus speed mode") Cc: stable@vger.kernel.org Signed-off-by: Ivan Semenov Link: https://lore.kernel.org/r/20240206172845.34316-1-ivan@semenov.dev Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f410bee50132..58ed7193a3ca 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1015,10 +1015,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; -- cgit v1.2.3 From 83bf24051a60d867e7633e07343913593c242f5d Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 8 Feb 2024 15:16:53 +0530 Subject: EDAC/versal: Make the bit position of injected errors configurable Currently, the bit positions to inject correctable and uncorrectable errors are hardcoded. To make that configurable add separate sysfs entries to set the bit positions for injecting CE and UE errors. Allow for single bit error for CE and two bits errors for UE injection. [ bp: Massage. ] Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240208094653.11704-1-shubhrajyoti.datta@amd.com --- drivers/edac/versal_edac.c | 193 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 161 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 62caf454b567..3016870689f1 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -42,8 +42,11 @@ #define ECCW0_FLIP_CTRL 0x109C #define ECCW0_FLIP0_OFFSET 0x10A0 +#define ECCW0_FLIP0_BITS 31 +#define ECCW0_FLIP1_OFFSET 0x10A4 #define ECCW1_FLIP_CTRL 0x10AC #define ECCW1_FLIP0_OFFSET 0x10B0 +#define ECCW1_FLIP1_OFFSET 0x10B4 #define ECCR0_CERR_STAT_OFFSET 0x10BC #define ECCR0_CE_ADDR_LO_OFFSET 0x10C0 #define ECCR0_CE_ADDR_HI_OFFSET 0x10C4 @@ -116,9 +119,6 @@ #define XDDR_BUS_WIDTH_32 1 #define XDDR_BUS_WIDTH_16 2 -#define ECC_CEPOISON_MASK 0x1 -#define ECC_UEPOISON_MASK 0x3 - #define XDDR_MAX_ROW_CNT 18 #define XDDR_MAX_COL_CNT 10 #define XDDR_MAX_RANK_CNT 2 @@ -133,6 +133,7 @@ * https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PCSR_LOCK-XRAM_SLCR-Register */ #define PCSR_UNLOCK_VAL 0xF9E8D7C6 +#define PCSR_LOCK_VAL 1 #define XDDR_ERR_TYPE_CE 0 #define XDDR_ERR_TYPE_UE 1 @@ -142,6 +143,7 @@ #define XILINX_DRAM_SIZE_12G 3 #define XILINX_DRAM_SIZE_16G 4 #define XILINX_DRAM_SIZE_32G 5 +#define NUM_UE_BITPOS 2 /** * struct ecc_error_info - ECC error log information. @@ -479,7 +481,7 @@ static void err_callback(const u32 *payload, void *data) writel(regval, priv->ddrmc_baseaddr + XDDR_ISR_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); edac_dbg(3, "Total error count CE %d UE %d\n", priv->ce_cnt, priv->ue_cnt); } @@ -650,7 +652,7 @@ static void enable_intr(struct edac_priv *priv) writel(XDDR_IRQ_UE_MASK, priv->ddrmc_baseaddr + XDDR_IRQ1_EN_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); } static void disable_intr(struct edac_priv *priv) @@ -663,7 +665,7 @@ static void disable_intr(struct edac_priv *priv) priv->ddrmc_baseaddr + XDDR_IRQ_DIS_OFFSET); /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); } #define to_mci(k) container_of(k, struct mem_ctl_info, dev) @@ -734,38 +736,63 @@ static void poison_setup(struct edac_priv *priv) writel(regval, priv->ddrmc_noc_baseaddr + XDDR_NOC_REG_ADEC15_OFFSET); } -static ssize_t xddr_inject_data_poison_store(struct mem_ctl_info *mci, - const char __user *data) +static void xddr_inject_data_ce_store(struct mem_ctl_info *mci, u8 ce_bitpos) { + u32 ecc0_flip0, ecc1_flip0, ecc0_flip1, ecc1_flip1; struct edac_priv *priv = mci->pvt_info; - writel(0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); - writel(0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); - - if (strncmp(data, "CE", 2) == 0) { - writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW0_FLIP0_OFFSET); - writel(ECC_CEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW1_FLIP0_OFFSET); + if (ce_bitpos < ECCW0_FLIP0_BITS) { + ecc0_flip0 = BIT(ce_bitpos); + ecc1_flip0 = BIT(ce_bitpos); + ecc0_flip1 = 0; + ecc1_flip1 = 0; } else { - writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW0_FLIP0_OFFSET); - writel(ECC_UEPOISON_MASK, priv->ddrmc_baseaddr + - ECCW1_FLIP0_OFFSET); + ce_bitpos = ce_bitpos - ECCW0_FLIP0_BITS; + ecc0_flip1 = BIT(ce_bitpos); + ecc1_flip1 = BIT(ce_bitpos); + ecc0_flip0 = 0; + ecc1_flip0 = 0; } - /* Lock the PCSR registers */ - writel(1, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); - - return 0; + writel(ecc0_flip0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(ecc1_flip0, priv->ddrmc_baseaddr + ECCW1_FLIP0_OFFSET); + writel(ecc0_flip1, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(ecc1_flip1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); } -static ssize_t inject_data_poison_store(struct file *file, const char __user *data, - size_t count, loff_t *ppos) +/* + * To inject a correctable error, the following steps are needed: + * + * - Write the correctable error bit position value: + * echo > /sys/kernel/debug/edac//inject_ce + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 to make sure default + * configuration is there and no addresses are masked. + * + * The row, column, bank, group and rank registers are written to the + * match ADEC bit to generate errors at the particular address. ADEC14 + * and ADEC15 have the match bits. + * + * xddr_inject_data_ce_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ce_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) { struct device *dev = file->private_data; struct mem_ctl_info *mci = to_mci(dev); struct edac_priv *priv = mci->pvt_info; + u8 ce_bitpos; + int ret; + + ret = kstrtou8_from_user(data, count, 0, &ce_bitpos); + if (ret) + return ret; /* Unlock the PCSR registers */ writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); @@ -773,17 +800,110 @@ static ssize_t inject_data_poison_store(struct file *file, const char __user *da poison_setup(priv); + xddr_inject_data_ce_store(mci, ce_bitpos); + ret = count; + /* Lock the PCSR registers */ - writel(1, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + return ret; +} + +static const struct file_operations xddr_inject_ce_fops = { + .open = simple_open, + .write = inject_data_ce_store, + .llseek = generic_file_llseek, +}; + +static void xddr_inject_data_ue_store(struct mem_ctl_info *mci, u32 val0, u32 val1) +{ + struct edac_priv *priv = mci->pvt_info; + + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP0_OFFSET); + writel(val0, priv->ddrmc_baseaddr + ECCW0_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); + writel(val1, priv->ddrmc_baseaddr + ECCW1_FLIP1_OFFSET); +} + +/* + * To inject an uncorrectable error, the following steps are needed: + * echo > /sys/kernel/debug/edac//inject_ue + * + * poison_setup() derives the row, column, bank, group and rank and + * writes to the ADEC registers based on the address given by the user. + * + * The ADEC12 and ADEC13 are mask registers; write 0 so that none of the + * addresses are masked. The row, column, bank, group and rank registers + * are written to the match ADEC bit to generate errors at the + * particular address. ADEC14 and ADEC15 have the match bits. + * + * xddr_inject_data_ue_store() updates the ECC FLIP registers with the + * bits to be corrupted based on the bit position given by the user. For + * uncorrectable errors + * 2 bit errors are injected. + * + * Upon doing a read to the address the errors are injected. + */ +static ssize_t inject_data_ue_store(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + struct edac_priv *priv = mci->pvt_info; + char buf[6], *pbuf, *token[2]; + u32 val0 = 0, val1 = 0; + u8 len, ue0, ue1; + int i, ret; + + len = min_t(size_t, count, sizeof(buf)); + if (copy_from_user(buf, data, len)) + return -EFAULT; + + buf[len] = '\0'; + pbuf = &buf[0]; + for (i = 0; i < NUM_UE_BITPOS; i++) + token[i] = strsep(&pbuf, ","); + + ret = kstrtou8(token[0], 0, &ue0); + if (ret) + return ret; + + ret = kstrtou8(token[1], 0, &ue1); + if (ret) + return ret; + + if (ue0 < ECCW0_FLIP0_BITS) { + val0 = BIT(ue0); + } else { + ue0 = ue0 - ECCW0_FLIP0_BITS; + val1 = BIT(ue0); + } - xddr_inject_data_poison_store(mci, data); + if (ue1 < ECCW0_FLIP0_BITS) { + val0 |= BIT(ue1); + } else { + ue1 = ue1 - ECCW0_FLIP0_BITS; + val1 |= BIT(ue1); + } + /* Unlock the PCSR registers */ + writel(PCSR_UNLOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_UNLOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + + poison_setup(priv); + + xddr_inject_data_ue_store(mci, val0, val1); + + /* Lock the PCSR registers */ + writel(PCSR_LOCK_VAL, priv->ddrmc_noc_baseaddr + XDDR_PCSR_OFFSET); + writel(PCSR_LOCK_VAL, priv->ddrmc_baseaddr + XDDR_PCSR_OFFSET); return count; } -static const struct file_operations xddr_inject_enable_fops = { +static const struct file_operations xddr_inject_ue_fops = { .open = simple_open, - .write = inject_data_poison_store, + .write = inject_data_ue_store, .llseek = generic_file_llseek, }; @@ -795,8 +915,17 @@ static void create_debugfs_attributes(struct mem_ctl_info *mci) if (!priv->debugfs) return; - edac_debugfs_create_file("inject_error", 0200, priv->debugfs, - &mci->dev, &xddr_inject_enable_fops); + if (!edac_debugfs_create_file("inject_ce", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ce_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } + + if (!edac_debugfs_create_file("inject_ue", 0200, priv->debugfs, + &mci->dev, &xddr_inject_ue_fops)) { + debugfs_remove_recursive(priv->debugfs); + return; + } debugfs_create_x64("address", 0600, priv->debugfs, &priv->err_inject_addr); mci->debugfs = priv->debugfs; -- cgit v1.2.3 From 6b1ba3f9040be5efc4396d86c9752cdc564730be Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Wed, 7 Feb 2024 15:39:51 +0100 Subject: mmc: mmci: stm32: fix DMA API overlapping mappings warning Turning on CONFIG_DMA_API_DEBUG_SG results in the following warning: DMA-API: mmci-pl18x 48220000.mmc: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 1 PID: 51 at kernel/dma/debug.c:568 add_dma_entry+0x234/0x2f4 Modules linked in: CPU: 1 PID: 51 Comm: kworker/1:2 Not tainted 6.1.28 #1 Hardware name: STMicroelectronics STM32MP257F-EV1 Evaluation Board (DT) Workqueue: events_freezable mmc_rescan Call trace: add_dma_entry+0x234/0x2f4 debug_dma_map_sg+0x198/0x350 __dma_map_sg_attrs+0xa0/0x110 dma_map_sg_attrs+0x10/0x2c sdmmc_idma_prep_data+0x80/0xc0 mmci_prep_data+0x38/0x84 mmci_start_data+0x108/0x2dc mmci_request+0xe4/0x190 __mmc_start_request+0x68/0x140 mmc_start_request+0x94/0xc0 mmc_wait_for_req+0x70/0x100 mmc_send_tuning+0x108/0x1ac sdmmc_execute_tuning+0x14c/0x210 mmc_execute_tuning+0x48/0xec mmc_sd_init_uhs_card.part.0+0x208/0x464 mmc_sd_init_card+0x318/0x89c mmc_attach_sd+0xe4/0x180 mmc_rescan+0x244/0x320 DMA API debug brings to light leaking dma-mappings as dma_map_sg and dma_unmap_sg are not correctly balanced. If an error occurs in mmci_cmd_irq function, only mmci_dma_error function is called and as this API is not managed on stm32 variant, dma_unmap_sg is never called in this error path. Signed-off-by: Christophe Kerello Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240207143951.938144-1-christophe.kerello@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 35067e1e6cd8..f5da7f9baa52 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -225,6 +225,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -263,9 +265,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -676,6 +699,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, -- cgit v1.2.3 From 3b566b30b41401888ee0e8eb904a1e7a6693794b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 Feb 2024 21:35:15 -0600 Subject: RAS/AMD/ATL: Add MI300 row retirement support DRAM row retirement depends on model-specific information that is best done within the AMD Address Translation Library. Export a generic wrapper function for other modules to use. Add any model-specific helpers here. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240214033516.1344948-2-yazen.ghannam@amd.com --- drivers/ras/amd/atl/Kconfig | 1 + drivers/ras/amd/atl/umc.c | 51 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/ras.h | 2 ++ 3 files changed, 54 insertions(+) (limited to 'drivers') diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig index a43513a700f1..df49c23e7f62 100644 --- a/drivers/ras/amd/atl/Kconfig +++ b/drivers/ras/amd/atl/Kconfig @@ -10,6 +10,7 @@ config AMD_ATL tristate "AMD Address Translation Library" depends on AMD_NB && X86_64 && RAS + depends on MEMORY_FAILURE default N help This library includes support for implementation-specific diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 7e310d1dfcfc..08c6dbd44c62 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -239,6 +239,57 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) return addr; } +/* + * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire + * all memory within that DRAM row. This applies to the memory with a DRAM + * bank. + * + * To find the memory addresses, loop through permutations of the DRAM column + * bits and find the System Physical address of each. The column bits are used + * to calculate the intermediate Normalized address, so all permutations should + * be checked. + * + * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. + */ +#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) +static void retire_row_mi300(struct atl_err *a_err) +{ + unsigned long addr; + struct page *p; + u8 col; + + for (col = 0; col < MI300_NUM_COL; col++) { + a_err->addr &= ~MI300_UMC_MCA_COL; + a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col); + + addr = amd_convert_umc_mca_addr_to_sys_addr(a_err); + if (IS_ERR_VALUE(addr)) + continue; + + addr = PHYS_PFN(addr); + + /* + * Skip invalid or already poisoned pages to avoid unnecessary + * error messages from memory_failure(). + */ + p = pfn_to_online_page(addr); + if (!p) + continue; + + if (PageHWPoison(p)) + continue; + + memory_failure(addr, 0); + } +} + +void amd_retire_dram_row(struct atl_err *a_err) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return retire_row_mi300(a_err); +} +EXPORT_SYMBOL_GPL(amd_retire_dram_row); + static unsigned long get_addr(unsigned long addr) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) diff --git a/include/linux/ras.h b/include/linux/ras.h index 09c632832bf1..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -45,8 +45,10 @@ struct atl_err { #if IS_ENABLED(CONFIG_AMD_ATL) void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); #else +static inline void amd_retire_dram_row(struct atl_err *err) { } static inline unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #endif /* CONFIG_AMD_ATL */ -- cgit v1.2.3 From 15137825100422c4c393c87af5aa5a8fa297b1f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2024 21:47:29 +0530 Subject: irqchip/gic-v3: Make gic_irq_domain_select() robust for zero parameter count Currently the irqdomain select callback is only invoked when the parameter count of the fwspec arguments is not zero. That makes sense because then the match is on the firmware node and eventually on the bus_token, which is already handled in the core code. The upcoming support for per device MSI domains requires to do real bus token specific checks in the MSI parent domains with a zero parameter count. Make the gic-v3 select() callback handle that case. Signed-off-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240127161753.114685-2-apatel@ventanamicro.com --- drivers/irqchip/irq-gic-v3.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 98b0329b7154..35b9362d178f 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1702,9 +1702,13 @@ static int gic_irq_domain_select(struct irq_domain *d, irq_hw_number_t hwirq; /* Not for us */ - if (fwspec->fwnode != d->fwnode) + if (fwspec->fwnode != d->fwnode) return 0; + /* Handle pure domain searches */ + if (!fwspec->param_count) + return d->bus_token == bus_token; + /* If this is not DT, then we have a single domain */ if (!is_of_node(fwspec->fwnode)) return 1; -- cgit v1.2.3 From c88f9110bfbca5975a8dee4c9792ba12684c7bca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2024 21:47:33 +0530 Subject: platform-msi: Prepare for real per device domains Provide functions to create and remove per device MSI domains which replace the platform-MSI domains. The new model is that each of the devices which utilize platform-MSI gets now its private MSI domain which is "customized" in size and with a device specific function to write the MSI message into the device. This is the same functionality as platform-MSI but it avoids all the down sides of platform MSI, i.e. the extra ID book keeping, the special data structure in the msi descriptor. Further the domains are only created when the devices are really in use, so the burden is on the usage and not on the infrastructure. Fill in the domain template and provide two functions to init/allocate and remove a per device MSI domain. Until all users and parent domain providers are converted, the init/alloc function invokes the original platform-MSI code when the irqdomain which is associated to the device does not provide MSI parent functionality yet. Signed-off-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240127161753.114685-6-apatel@ventanamicro.com --- drivers/base/platform-msi.c | 103 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/msi.h | 4 ++ 2 files changed, 107 insertions(+) (limited to 'drivers') diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index f37ad34c80ec..b56e919acabb 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -13,6 +13,8 @@ #include #include +/* Begin of removal area. Once everything is converted over. Cleanup the includes too! */ + #define DEV_ID_SHIFT 21 #define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT)) @@ -350,3 +352,104 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir return msi_domain_populate_irqs(domain->parent, dev, virq, nr_irqs, &data->arg); } + +/* End of removal area */ + +/* Real per device domain interfaces */ + +/* + * This indirection can go when platform_device_msi_init_and_alloc_irqs() + * is switched to a proper irq_chip::irq_write_msi_msg() callback. Keep it + * simple for now. + */ +static void platform_msi_write_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + irq_write_msi_msg_t cb = d->chip_data; + + cb(irq_data_get_msi_desc(d), msg); +} + +static void platform_msi_set_desc_byindex(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; + arg->hwirq = desc->msi_index; +} + +static const struct msi_domain_template platform_msi_template = { + .chip = { + .name = "pMSI", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_write_msi_msg = platform_msi_write_msi_msg, + /* The rest is filled in by the platform MSI parent */ + }, + + .ops = { + .set_desc = platform_msi_set_desc_byindex, + }, + + .info = { + .bus_token = DOMAIN_BUS_DEVICE_MSI, + }, +}; + +/** + * platform_device_msi_init_and_alloc_irqs - Initialize platform device MSI + * and allocate interrupts for @dev + * @dev: The device for which to allocate interrupts + * @nvec: The number of interrupts to allocate + * @write_msi_msg: Callback to write an interrupt message for @dev + * + * Returns: + * Zero for success, or an error code in case of failure + * + * This creates a MSI domain on @dev which has @dev->msi.domain as + * parent. The parent domain sets up the new domain. The domain has + * a fixed size of @nvec. The domain is managed by devres and will + * be removed when the device is removed. + * + * Note: For migration purposes this falls back to the original platform_msi code + * up to the point where all platforms have been converted to the MSI + * parent model. + */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg) +{ + struct irq_domain *domain = dev->msi.domain; + + if (!domain || !write_msi_msg) + return -EINVAL; + + /* Migration support. Will go away once everything is converted */ + if (!irq_domain_is_msi_parent(domain)) + return platform_msi_domain_alloc_irqs(dev, nvec, write_msi_msg); + + /* + * @write_msi_msg is stored in the resulting msi_domain_info::data. + * The underlying domain creation mechanism will assign that + * callback to the resulting irq chip. + */ + if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN, + &platform_msi_template, + nvec, NULL, write_msi_msg)) + return -ENODEV; + + return msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, nvec - 1); +} +EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs); + +/** + * platform_device_msi_free_irqs_all - Free all interrupts for @dev + * @dev: The device for which to free interrupts + */ +void platform_device_msi_free_irqs_all(struct device *dev) +{ + struct irq_domain *domain = dev->msi.domain; + + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); + + /* Migration support. Will go away once everything is converted */ + if (!irq_domain_is_msi_parent(domain)) + platform_msi_free_priv_data(dev); +} +EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all); diff --git a/include/linux/msi.h b/include/linux/msi.h index d5d1513ef4d6..ef167961c782 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -664,6 +664,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); +/* Per device platform MSI */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_device_msi_free_irqs_all(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev); #else /* CONFIG_GENERIC_MSI_IRQ */ -- cgit v1.2.3 From 14fd06c776b5289a43c91cdc64bac3bdbc7b397e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2024 21:47:34 +0530 Subject: irqchip: Convert all platform MSI users to the new API Switch all the users of the platform MSI domain over to invoke the new interfaces which branch to the original platform MSI functions when the irqdomain associated to the caller device does not yet provide MSI parent functionality. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240127161753.114685-7-apatel@ventanamicro.com --- drivers/dma/mv_xor_v2.c | 8 ++++---- drivers/dma/qcom/hidma.c | 6 +++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 5 +++-- drivers/mailbox/bcm-flexrm-mailbox.c | 8 ++++---- drivers/perf/arm_smmuv3_pmu.c | 4 ++-- drivers/ufs/host/ufs-qcom.c | 8 ++++---- 6 files changed, 20 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 1ebfbe88e733..97ebc791a30b 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -747,8 +747,8 @@ static int mv_xor_v2_probe(struct platform_device *pdev) if (IS_ERR(xor_dev->clk)) return PTR_ERR(xor_dev->clk); - ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1, - mv_xor_v2_set_msi_msg); + ret = platform_device_msi_init_and_alloc_irqs(&pdev->dev, 1, + mv_xor_v2_set_msi_msg); if (ret) return ret; @@ -851,7 +851,7 @@ free_hw_desq: xor_dev->desc_size * MV_XOR_V2_DESC_NUM, xor_dev->hw_desq_virt, xor_dev->hw_desq); free_msi_irqs: - platform_msi_domain_free_irqs(&pdev->dev); + platform_device_msi_free_irqs_all(&pdev->dev); return ret; } @@ -867,7 +867,7 @@ static void mv_xor_v2_remove(struct platform_device *pdev) devm_free_irq(&pdev->dev, xor_dev->irq, xor_dev); - platform_msi_domain_free_irqs(&pdev->dev); + platform_device_msi_free_irqs_all(&pdev->dev); tasklet_kill(&xor_dev->irq_tasklet); } diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index d63b93dc7047..202ac95227cb 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -696,7 +696,7 @@ static void hidma_free_msis(struct hidma_dev *dmadev) devm_free_irq(dev, virq, &dmadev->lldev); } - platform_msi_domain_free_irqs(dev); + platform_device_msi_free_irqs_all(dev); #endif } @@ -706,8 +706,8 @@ static int hidma_request_msi(struct hidma_dev *dmadev, #ifdef CONFIG_GENERIC_MSI_IRQ int rc, i, virq; - rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS, - hidma_write_msi_msg); + rc = platform_device_msi_init_and_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS, + hidma_write_msi_msg); if (rc) return rc; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0ffb1cf17e0b..a74a509bcd63 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3125,7 +3125,8 @@ static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) static void arm_smmu_free_msis(void *data) { struct device *dev = data; - platform_msi_domain_free_irqs(dev); + + platform_device_msi_free_irqs_all(dev); } static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) @@ -3166,7 +3167,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) } /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ - ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); + ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); if (ret) { dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); return; diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index e3e28a4f7d01..b1abc2a0c971 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -1587,8 +1587,8 @@ static int flexrm_mbox_probe(struct platform_device *pdev) } /* Allocate platform MSIs for each ring */ - ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings, - flexrm_mbox_msi_write); + ret = platform_device_msi_init_and_alloc_irqs(dev, mbox->num_rings, + flexrm_mbox_msi_write); if (ret) goto fail_destroy_cmpl_pool; @@ -1641,7 +1641,7 @@ skip_debugfs: fail_free_debugfs_root: debugfs_remove_recursive(mbox->root); - platform_msi_domain_free_irqs(dev); + platform_device_msi_free_irqs_all(dev); fail_destroy_cmpl_pool: dma_pool_destroy(mbox->cmpl_pool); fail_destroy_bd_pool: @@ -1657,7 +1657,7 @@ static void flexrm_mbox_remove(struct platform_device *pdev) debugfs_remove_recursive(mbox->root); - platform_msi_domain_free_irqs(dev); + platform_device_msi_free_irqs_all(dev); dma_pool_destroy(mbox->cmpl_pool); dma_pool_destroy(mbox->bd_pool); diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 6303b82566f9..9e5d7fa647b6 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -716,7 +716,7 @@ static void smmu_pmu_free_msis(void *data) { struct device *dev = data; - platform_msi_domain_free_irqs(dev); + platform_device_msi_free_irqs_all(dev); } static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) @@ -746,7 +746,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu) if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI)) return; - ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); + ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); if (ret) { dev_warn(dev, "failed to allocate MSIs\n"); return; diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 39eef470f8fa..8fde5204e88b 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1712,8 +1712,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) * 2. Poll queues do not need ESI. */ nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; - ret = platform_msi_domain_alloc_irqs(hba->dev, nr_irqs, - ufs_qcom_write_msi_msg); + ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs, + ufs_qcom_write_msi_msg); if (ret) { dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); return ret; @@ -1742,7 +1742,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) devm_free_irq(hba->dev, desc->irq, hba); } msi_unlock_descs(hba->dev); - platform_msi_domain_free_irqs(hba->dev); + platform_device_msi_free_irqs_all(hba->dev); } else { if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 && host->hw_ver.step == 0) @@ -1818,7 +1818,7 @@ static void ufs_qcom_remove(struct platform_device *pdev) pm_runtime_get_sync(&(pdev)->dev); ufshcd_remove(hba); - platform_msi_domain_free_irqs(hba->dev); + platform_device_msi_free_irqs_all(hba->dev); } static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = { -- cgit v1.2.3 From 1a4671ff7a903e87e4e76213e200bb8bcfa942e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Feb 2024 16:35:43 +0100 Subject: platform-msi: Remove unused interfaces Signed-off-by: Thomas Gleixner --- drivers/base/platform-msi.c | 16 ++-------------- include/linux/msi.h | 3 --- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index b56e919acabb..0d01890160f3 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -206,8 +206,8 @@ static void platform_msi_free_priv_data(struct device *dev) * Returns: * Zero for success, or an error code in case of failure */ -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg) +static int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg) { int err; @@ -221,18 +221,6 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, return err; } -EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs); - -/** - * platform_msi_domain_free_irqs - Free MSI interrupts for @dev - * @dev: The device for which to free interrupts - */ -void platform_msi_domain_free_irqs(struct device *dev) -{ - msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); - platform_msi_free_priv_data(dev); -} -EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs); /** * platform_msi_get_host_data - Query the private data associated with diff --git a/include/linux/msi.h b/include/linux/msi.h index ef167961c782..b0842ea55bde 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -635,9 +635,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg); -void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, -- cgit v1.2.3 From 7e3ec6286753b404666af9a58d283690302c9321 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:10 +0100 Subject: x86/cpu/amd: Provide a separate accessor for Node ID AMD (ab)uses topology_die_id() to store the Node ID information and topology_max_dies_per_pkg to store the number of nodes per package. This collides with the proper processor die level enumeration which is coming on AMD with CPUID 8000_0026, unless there is a correlation between the two. There is zero documentation about that. So provide new storage and new accessors which for now still access die_id and topology_max_die_per_pkg(). Will be mopped up after AMD and HYGON are converted over. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.956116738@linutronix.de --- arch/x86/events/amd/core.c | 2 +- arch/x86/include/asm/processor.h | 3 +++ arch/x86/include/asm/topology.h | 8 ++++++++ arch/x86/kernel/amd_nb.c | 4 ++-- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/mce/amd.c | 4 ++-- arch/x86/kernel/cpu/mce/inject.c | 4 ++-- drivers/edac/amd64_edac.c | 4 ++-- drivers/edac/mce_amd.c | 4 ++-- 9 files changed, 23 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 81f6d8275b6b..69a3b02e50bb 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -579,7 +579,7 @@ static void amd_pmu_cpu_starting(int cpu) if (!x86_pmu.amd_nb_constraints) return; - nb_id = topology_die_id(cpu); + nb_id = topology_amd_node_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); for_each_online_cpu(i) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 26620d7642a9..26a6001ddafd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -100,6 +100,9 @@ struct cpuinfo_topology { u32 logical_pkg_id; u32 logical_die_id; + // AMD Node ID and Nodes per Package info + u32 amd_node_id; + // Cache level topology IDs u32 llc_id; u32 l2c_id; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index fa5d803ed7e2..1fd12e98a283 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -131,6 +131,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) + extern unsigned int __max_die_per_package; #ifdef CONFIG_SMP @@ -161,6 +163,11 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +static inline unsigned int topology_amd_nodes_per_pkg(void) +{ + return __max_die_per_package; +} + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -182,6 +189,7 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 053f6dcc6b2c..5bf5f9fc5753 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -386,7 +386,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res) int amd_get_subcaches(int cpu) { - struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link; + struct pci_dev *link = node_to_amd_nb(topology_amd_node_id(cpu))->link; unsigned int mask; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) @@ -400,7 +400,7 @@ int amd_get_subcaches(int cpu) int amd_set_subcaches(int cpu, unsigned long mask) { static unsigned int reset, ban; - struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu)); + struct amd_northbridge *nb = node_to_amd_nb(topology_amd_node_id(cpu)); unsigned int reg; int cuid; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c131c412db89..4a33218cb103 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -595,7 +595,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) if (index < 3) return; - node = topology_die_id(smp_processor_id()); + node = topology_amd_node_id(smp_processor_id()); this_leaf->nb = node_to_amd_nb(node); if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) amd_calc_l3_indices(this_leaf->nb); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 2b46eb0fdf3a..9a0133ef7e20 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1231,7 +1231,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, return -ENODEV; if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_die_id(cpu)); + nb = node_to_amd_nb(topology_amd_node_id(cpu)); /* threshold descriptor already initialized on this node? */ if (nb && nb->bank4) { @@ -1335,7 +1335,7 @@ static void threshold_remove_bank(struct threshold_bank *bank) * The last CPU on this node using the shared bank is going * away, remove that bank now. */ - nb = node_to_amd_nb(topology_die_id(smp_processor_id())); + nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id())); nb->bank4 = NULL; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 72f0695c3dc1..308c5b5e0bbe 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -543,8 +543,8 @@ static void do_inject(void) if (boot_cpu_has(X86_FEATURE_AMD_DCM) && b == 4 && boot_cpu_data.x86 < 0x17) { - toggle_nb_mca_mst_cpu(topology_die_id(cpu)); - cpu = get_nbc_for_node(topology_die_id(cpu)); + toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu)); + cpu = get_nbc_for_node(topology_amd_node_id(cpu)); } cpus_read_lock(); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 537b9987a431..2b8c20bb926a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1915,7 +1915,7 @@ ddr3: /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - u16 mce_nid = topology_die_id(m->extcpu); + u16 mce_nid = topology_amd_node_id(m->extcpu); struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; @@ -3446,7 +3446,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) int cpu; for_each_online_cpu(cpu) - if (topology_die_id(cpu) == nid) + if (topology_amd_node_id(cpu) == nid) cpumask_set_cpu(cpu, mask); } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ec8b6c9fedfd..8130c3dc64da 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -584,7 +584,7 @@ static void decode_mc3_mce(struct mce *m) static void decode_mc4_mce(struct mce *m) { unsigned int fam = x86_family(m->cpuid); - int node_id = topology_die_id(m->extcpu); + int node_id = topology_amd_node_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u8 offset = 0; @@ -746,7 +746,7 @@ static void decode_smca_error(struct mce *m) if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0 && decode_dram_ecc) - decode_dram_ecc(topology_die_id(m->extcpu), m); + decode_dram_ecc(topology_amd_node_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) -- cgit v1.2.3 From bd745d1c41e7fa56242889eb5dc6df2d7dd5df32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:13 +0100 Subject: x86/cpu/topology: Rename topology_max_die_per_package() The plural of die is dies. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.065874205@linutronix.de --- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/uncore.c | 2 +- arch/x86/events/intel/uncore_snbep.c | 2 +- arch/x86/events/rapl.c | 2 +- arch/x86/include/asm/topology.h | 2 +- drivers/hwmon/coretemp.c | 2 +- drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c | 2 +- drivers/powercap/intel_rapl_common.c | 2 +- drivers/thermal/intel/intel_hfi.c | 2 +- drivers/thermal/intel/intel_powerclamp.c | 2 +- drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4b50a3a9818a..326c8cd5aa2d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -834,7 +834,7 @@ static int __init cstate_init(void) } if (has_cstate_pkg) { - if (topology_max_die_per_package() > 1) { + if (topology_max_dies_per_package() > 1) { err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7927c0b832fa..258e2cdf28fa 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1893,7 +1893,7 @@ static int __init intel_uncore_init(void) return -ENODEV; __uncore_max_dies = - topology_max_packages() * topology_max_die_per_package(); + topology_max_packages() * topology_max_dies_per_package(); id = x86_match_cpu(intel_uncore_match); if (!id) { diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index a96496bef678..3f6bd3e4a763 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1406,7 +1406,7 @@ static int topology_gidnid_map(int nodeid, u32 gidnid) */ for (i = 0; i < 8; i++) { if (nodeid == GIDNIDMAP(gidnid, i)) { - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) die_id = i; else die_id = topology_phys_to_logical_pkg(i); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 8d98d468b976..fb2b1961e5a3 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -674,7 +674,7 @@ static const struct attribute_group *rapl_attr_update[] = { static int __init init_rapl_pmus(void) { - int maxdie = topology_max_packages() * topology_max_die_per_package(); + int maxdie = topology_max_packages() * topology_max_dies_per_package(); size_t size; size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f9eb7a7831f0..6a71794bd4e2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -152,7 +152,7 @@ static inline unsigned int topology_max_packages(void) return __max_logical_packages; } -static inline unsigned int topology_max_die_per_package(void) +static inline unsigned int topology_max_dies_per_package(void) { return __max_dies_per_package; } diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index b8fc8d1ef20d..b0991dde2e59 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -782,7 +782,7 @@ static int __init coretemp_init(void) if (!x86_match_cpu(coretemp_ids)) return -ENODEV; - max_zones = topology_max_packages() * topology_max_die_per_package(); + max_zones = topology_max_packages() * topology_max_dies_per_package(); zone_devices = kcalloc(max_zones, sizeof(struct platform_device *), GFP_KERNEL); if (!zone_devices) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index a5e0f5c22179..b89c0dda9e5d 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -242,7 +242,7 @@ static int __init intel_uncore_init(void) return -ENODEV; uncore_max_entries = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); uncore_instances = kcalloc(uncore_max_entries, sizeof(*uncore_instances), GFP_KERNEL); if (!uncore_instances) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 2feed036c1cd..00c861899a47 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1564,7 +1564,7 @@ struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id if (id_is_cpu) { rp->id = topology_logical_die_id(id); rp->lead_cpu = id; - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", topology_physical_package_id(id), topology_die_id(id)); else diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 3b04c6ec4fca..40d664a66cdc 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -607,7 +607,7 @@ void __init intel_hfi_init(void) /* There is one HFI instance per die/package. */ max_hfi_instances = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); /* * This allocation may fail. CPU hotplug callbacks must check diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index bc6eb0dd66a4..4ba649370aa1 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -587,7 +587,7 @@ static int powerclamp_idle_injection_register(void) poll_pkg_cstate_enable = false; if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); - if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + if (topology_max_packages() == 1 && topology_max_dies_per_package() == 1) poll_pkg_cstate_enable = true; } else { ii_dev = idle_inject_register(idle_injection_cpu_mask); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 11a7f8108bbb..f6c2e5964b8f 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -494,7 +494,7 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; - max_id = topology_max_packages() * topology_max_die_per_package(); + max_id = topology_max_packages() * topology_max_dies_per_package(); zones = kcalloc(max_id, sizeof(struct zone_device *), GFP_KERNEL); if (!zones) -- cgit v1.2.3 From d3d17e23d1a0d1f959b4fa55b35f1802d9c584fa Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 12 Feb 2024 13:03:34 +0200 Subject: thunderbolt: Fix NULL pointer dereference in tb_port_update_credits() Olliver reported that his system crashes when plugging in Thunderbolt 1 device: BUG: kernel NULL pointer dereference, address: 0000000000000020 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:tb_port_do_update_credits+0x1b/0x130 [thunderbolt] Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? tb_port_do_update_credits+0x1b/0x130 ? tb_switch_update_link_attributes+0x83/0xd0 tb_switch_add+0x7a2/0xfe0 tb_scan_port+0x236/0x6f0 tb_handle_hotplug+0x6db/0x900 process_one_work+0x171/0x340 worker_thread+0x27b/0x3a0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This is due the fact that some Thunderbolt 1 devices only have one lane adapter. Fix this by checking for the lane 1 before we read its credits. Reported-by: Olliver Schinagl Closes: https://lore.kernel.org/linux-usb/c24c7882-6254-4e68-8f22-f3e8f65dc84f@schinagl.nl/ Fixes: 81af2952e606 ("thunderbolt: Add support for asymmetric link") Cc: stable@vger.kernel.org Cc: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 900114ba4371..fad40c4bc710 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1249,6 +1249,9 @@ int tb_port_update_credits(struct tb_port *port) ret = tb_port_do_update_credits(port); if (ret) return ret; + + if (!port->dual_link_port) + return 0; return tb_port_do_update_credits(port->dual_link_port); } -- cgit v1.2.3 From d4c08d8b23b22807c712208cd05cb047e92e7672 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 Feb 2024 15:38:24 +0200 Subject: phy: qcom-qmp-usb: fix v3 offsets data The MSM8996 platform has registers setup different to the rest of QMP v3 USB platforms. It has PCS region at 0x600 and no PCS_MISC region, while other platforms have PCS region at 0x800 and PCS_MISC at 0x600. This results in the malfunctioning USB host on some of the platforms. The commit f74c35b630d4 ("phy: qcom-qmp-usb: fix register offsets for ipq8074/ipq6018") fixed the issue for IPQ platforms, but missed the SDM845 which has the same register layout. To simplify future platform addition and to make the driver more future proof, rename qmp_usb_offsets_v3 to qmp_usb_offsets_v3_msm8996 (to mark its peculiarity), rename qmp_usb_offsets_ipq8074 to qmp_usb_offsets_v3 and use it for SDM845 platform. Fixes: 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240213133824.2218916-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 6621246e4ddf..5c003988c35d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1556,7 +1556,7 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; -static const struct qmp_usb_offsets qmp_usb_offsets_ipq8074 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { .serdes = 0, .pcs = 0x800, .pcs_misc = 0x600, @@ -1572,7 +1572,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = { .rx = 0x400, }; -static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = { .serdes = 0, .pcs = 0x600, .tx = 0x200, @@ -1624,7 +1624,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq9574_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl), @@ -1642,7 +1642,7 @@ static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq8074_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq8074_usb3_serdes_tbl), @@ -1678,7 +1678,7 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = { static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_v3, + .offsets = &qmp_usb_offsets_v3_msm8996, .serdes_tbl = msm8996_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(msm8996_usb3_serdes_tbl), -- cgit v1.2.3 From ecec7c9f29a7114a3e23a14020b1149ea7dffb4f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 14 Feb 2024 18:49:31 -0800 Subject: dmaengine: idxd: Remove shadow Event Log head stored in idxd head is defined in idxd->evl as a shadow of head in the EVLSTATUS register. There are two issues related to the shadow head: 1. Mismatch between the shadow head and the state of the EVLSTATUS register: If Event Log is supported, upon completion of the Enable Device command, the Event Log head in the variable idxd->evl->head should be cleared to match the state of the EVLSTATUS register. But the variable is not reset currently, leading mismatch between the variable and the register state. The mismatch causes incorrect processing of Event Log entries. 2. Unnecessary shadow head definition: The shadow head is unnecessary as head can be read directly from the EVLSTATUS register. Reading head from the register incurs no additional cost because event log head and tail are always read together and tail is already read directly from the register as required by hardware. Remove the shadow Event Log head stored in idxd->evl to address the mentioned issues. Fixes: 244da66cda35 ("dmaengine: idxd: setup event log configuration") Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240215024931.1739621-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 2 +- drivers/dma/idxd/debugfs.c | 2 +- drivers/dma/idxd/idxd.h | 1 - drivers/dma/idxd/irq.c | 3 +-- 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 77f8885cf407..e5a94a93a3cc 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -345,7 +345,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) spin_lock(&evl->lock); status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = status.tail; - h = evl->head; + h = status.head; size = evl->size; while (h != t) { diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c index 9cfbd9b14c4c..f3f25ee676f3 100644 --- a/drivers/dma/idxd/debugfs.c +++ b/drivers/dma/idxd/debugfs.c @@ -68,9 +68,9 @@ static int debugfs_evl_show(struct seq_file *s, void *d) spin_lock(&evl->lock); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; evl_size = evl->size; seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 47de3f93ff1e..d0f5db6cf1ed 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -300,7 +300,6 @@ struct idxd_evl { unsigned int log_size; /* The number of entries in the event log. */ u16 size; - u16 head; unsigned long *bmap; bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index c8a0aa874b11..348aa21389a9 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -367,9 +367,9 @@ static void process_evl_entries(struct idxd_device *idxd) /* Clear interrupt pending bit */ iowrite32(evl_status.bits_upper32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; size = idxd->evl->size; while (h != t) { @@ -378,7 +378,6 @@ static void process_evl_entries(struct idxd_device *idxd) h = (h + 1) % size; } - evl->head = h; evl_status.head = h; iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); spin_unlock(&evl->lock); -- cgit v1.2.3 From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 7 Feb 2024 14:47:32 -0500 Subject: dmaengine: fsl-edma: correct max_segment_size setting Correcting the previous setting of 0x3fff to the actual value of 0x7fff. Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and utilization of FIELD_GET to obtain the accurate maximum value. Cc: stable@vger.kernel.org Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.h | 5 +++-- drivers/dma/fsl-edma-main.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index bb5221158a77..f5e216b157c7 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -30,8 +30,9 @@ #define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8) #define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11) -#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0)) -#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0)) +#define EDMA_TCD_ITER_MASK GENMASK(14, 0) +#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK) +#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK) #define EDMA_TCD_CSR_START BIT(0) #define EDMA_TCD_CSR_INT_MAJOR BIT(1) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 45cc419b1b4a..d36e28b9c767 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev) DMAENGINE_ALIGN_32_BYTES; /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */ - dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff); + dma_set_max_seg_size(fsl_edma->dma_dev.dev, + FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK)); fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; -- cgit v1.2.3 From 89b0f15f408f7c4ee98c1ec4c3224852fcbc3274 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:16 +0100 Subject: x86/cpu/topology: Get rid of cpuinfo::x86_max_cores Now that __num_cores_per_package and __num_threads_per_package are available, cpuinfo::x86_max_cores and the related math all over the place can be replaced with the ready to consume data. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.176147806@linutronix.de --- Documentation/arch/x86/topology.rst | 24 +++++++++--------------- arch/x86/events/intel/uncore_nhmex.c | 4 ++-- arch/x86/events/intel/uncore_snb.c | 8 ++++---- arch/x86/events/intel/uncore_snbep.c | 16 ++++++++-------- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/debugfs.c | 3 ++- arch/x86/kernel/cpu/mce/inject.c | 3 +-- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/topology_common.c | 3 --- arch/x86/kernel/smpboot.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/hwmon/fam15h_power.c | 2 +- 14 files changed, 31 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 08ebf9edbfc1..7352ab89a55a 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: - - cpuinfo_x86.x86_max_cores: + - topology_num_threads_per_package() - The number of cores in a package. This information is retrieved via CPUID. + The number of threads in a package. - - cpuinfo_x86.x86_max_dies: + - topology_num_cores_per_package() - The number of dies in a package. This information is retrieved via CPUID. + The number of cores in a package. + + - topology_max_dies_per_package() + + The maximum number of dies in a package. - cpuinfo_x86.topo.die_id: - The physical ID of the die. This information is retrieved via CPUID. + The physical ID of the die. - cpuinfo_x86.topo.pkg_id: @@ -96,16 +100,6 @@ are SMT- or CMT-type threads. AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses "core". -Core-related topology information in the kernel: - - - smp_num_siblings: - - The number of threads in a core. The number of threads in a package can be - calculated by:: - - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings - - Threads ======= A thread is a single scheduling unit. It's the equivalent to a logical Linux diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb..92da8aaa5966 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package()) + nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = nhmex_msr_uncores; } /* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 7fd4334e12a1..9462fd9f3b7a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = { void snb_uncore_cpu_init(void) { uncore_msr_uncores = snb_msr_uncores; - if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snb_uncore_cbox.num_boxes = topology_num_cores_per_package(); } static void skl_uncore_msr_init_box(struct intel_uncore_box *box) @@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = { void skl_uncore_cpu_init(void) { uncore_msr_uncores = skl_msr_uncores; - if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package()) + skl_uncore_cbox.num_boxes = topology_num_cores_per_package(); snb_uncore_arb.ops = &skl_uncore_msr_ops; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3f6bd3e4a763..2eaf0f339849 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1172,8 +1172,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { void snbep_uncore_cpu_init(void) { - if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = snbep_msr_uncores; } @@ -1845,8 +1845,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = { void ivbep_uncore_cpu_init(void) { - if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = ivbep_msr_uncores; } @@ -2917,8 +2917,8 @@ static bool hswep_has_limit_sbox(unsigned int device) void hswep_uncore_cpu_init(void) { - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + hswep_uncore_cbox.num_boxes = topology_num_cores_per_package(); /* Detect 6-8 core systems with only two SBOXes */ if (hswep_has_limit_sbox(HSWEP_PCU_DID)) @@ -3280,8 +3280,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package()) + bdx_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index de1648ee2b9e..326581df4846 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -149,8 +149,6 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; - /* cpuid returned max cores value: */ - u16 x86_max_cores; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e1d118e6926e..f2241e7e96fd 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9a1014386a9..05e0b31f75e9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1738,7 +1738,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index f40f3eecebc6..3baf3e435834 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); - seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "num_threads: %u\n", __num_threads_per_package); + seq_printf(m, "num_cores: %u\n", __num_cores_per_package); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1e327881073f..94953d749475 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -430,10 +430,9 @@ static void trigger_thr_int(void *info) static u32 get_nbc_for_node(int node_id) { - struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); + cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 857e608af641..5f0414452b67 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -641,7 +641,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; - do_div(llc_size, c->x86_max_cores); + do_div(llc_size, topology_num_cores_per_package()); llc_size_per_core = (unsigned int)llc_size; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a2c3f8f5886d..a50ae8d63d1c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -155,9 +155,6 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Maximum number of cores on this package */ - c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); - c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 35c272cb45fb..9c1e1219c28f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -564,7 +564,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; - bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; + bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2ff6deedef95..da1f43999d09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -451,7 +451,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ - smu->cpu_core_num = boot_cpu_data.x86_max_cores; + smu->cpu_core_num = topology_num_cores_per_package(); #else smu->cpu_core_num = 4; #endif diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 6307112c2c0c..9ed2c4b6734e 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev, * With the new x86 topology modelling, x86_max_cores is the * compute unit number. */ - cu_num = boot_cpu_data.x86_max_cores; + cu_num = topology_num_cores_per_package(); ret = read_registers(data); if (ret) -- cgit v1.2.3 From 2df70149e73e79783bcbc7db4fa51ecef0e2022c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Feb 2024 16:51:33 +0100 Subject: power: supply: bq27xxx-i2c: Do not free non existing IRQ The bq27xxx i2c-client may not have an IRQ, in which case client->irq will be 0. bq27xxx_battery_i2c_probe() already has an if (client->irq) check wrapping the request_threaded_irq(). But bq27xxx_battery_i2c_remove() unconditionally calls free_irq(client->irq) leading to: [ 190.310742] ------------[ cut here ]------------ [ 190.310843] Trying to free already-free IRQ 0 [ 190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310 Followed by a backtrace when unbinding the driver. Add an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring probe() to fix this. Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 3a1798b0c1a7..9910c600743e 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); -- cgit v1.2.3 From c83ccdc9586b3e9882da9e27507c046751999d59 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Fri, 9 Feb 2024 16:50:34 +0100 Subject: counter: fix privdata alignment Aligning to the L1 cache does not guarantee the same alignment as kmallocing an object [1]. Furthermore, in some platforms, that alignment is not sufficient for DMA safety (in case someone wants to have a DMA safe buffer in privdata) [2]. Sometime ago, we had the same fixes in IIO. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/base/devres.c#n35 [2]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: c18e2760308e ("counter: Provide alternative counter registration functions") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240209-counter-align-fix-v2-1-5777ea0a2722@analog.com Signed-off-by: William Breathitt Gray --- drivers/counter/counter-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c index 09c77afb33ca..3f24481fc04a 100644 --- a/drivers/counter/counter-core.c +++ b/drivers/counter/counter-core.c @@ -31,10 +31,11 @@ struct counter_device_allochelper { struct counter_device counter; /* - * This is cache line aligned to ensure private data behaves like if it - * were kmalloced separately. + * This ensures private data behaves like if it were kmalloced + * separately. Also ensures the minimum alignment for safe DMA + * operations (which may or may not mean cache alignment). */ - unsigned long privdata[] ____cacheline_aligned; + unsigned long privdata[] __aligned(ARCH_DMA_MINALIGN); }; static void counter_device_release(struct device *dev) -- cgit v1.2.3 From 31edf4bbe0ba27fd03ac7d87eb2ee3d2a231af6d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 17 Feb 2024 20:25:38 -0800 Subject: nbd: null check for nla_nest_start nla_nest_start() may fail and return NULL. Insert a check and set errno based on other call sites within the same source code. Signed-off-by: Navid Emamdoost Reviewed-by: Michal Kubecek Fixes: 47d902b90a32 ("nbd: add a status netlink command") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240218042534.it.206-kees@kernel.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 30ae3cc12e77..d2b422d842b7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2433,6 +2433,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { -- cgit v1.2.3 From 7a9b9012043e126f6d6f4683e67409312d1b707b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:11 +0200 Subject: mei: me: add arrow lake point S DID Add Arrow Lake S device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 961e5d53a27a..b10536e4974d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -112,6 +112,7 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 676d566f38dd..1a614fb7fdb6 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -119,6 +119,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- cgit v1.2.3 From 8436f25802ec028ac7254990893f3e01926d9b79 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 11 Feb 2024 12:39:12 +0200 Subject: mei: me: add arrow lake point H DID Add Arrow Lake H device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240211103912.117105-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index b10536e4974d..aac36750d2c5 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -113,6 +113,7 @@ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ #define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 1a614fb7fdb6..8cf636c54032 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -120,6 +120,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } -- cgit v1.2.3 From daaf5286b6d2528a73c651aa2d4059bc1bd67c2e Mon Sep 17 00:00:00 2001 From: Wentong Wu Date: Wed, 7 Feb 2024 08:43:04 +0800 Subject: mei: Add Meteor Lake support for IVSC device Add IVSC device support on Meteor Lake platform. Signed-off-by: Wentong Wu Cc: stable Reviewed-by: Sakari Ailus Acked-by: Tomas Winkler Link: https://lore.kernel.org/r/20240207004304.31862-1-wentong.wu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-tp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 6f4a4be6ccb5..55f7db490d3b 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -535,6 +535,7 @@ static const struct acpi_device_id vsc_tp_acpi_ids[] = { { "INTC1009" }, /* Raptor Lake */ { "INTC1058" }, /* Tiger Lake */ { "INTC1094" }, /* Alder Lake */ + { "INTC10D0" }, /* Meteor Lake */ {} }; MODULE_DEVICE_TABLE(acpi, vsc_tp_acpi_ids); -- cgit v1.2.3 From 74fa8f9c553f7b5ccab7d103acae63cc2e080465 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:47 +0100 Subject: block: pass a queue_limits argument to blk_alloc_disk Pass a queue_limits to blk_alloc_disk and apply it if non-NULL. This will allow allocating queues with valid queue limits instead of setting the values one at a time later. Also change blk_alloc_disk to return an ERR_PTR instead of just NULL which can't distinguish errors. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-2-hch@lst.de Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 6 ++++-- arch/xtensa/platforms/iss/simdisk.c | 8 +++++--- block/genhd.c | 11 ++++++----- drivers/block/brd.c | 7 ++++--- drivers/block/drbd/drbd_main.c | 6 ++++-- drivers/block/n64cart.c | 6 ++++-- drivers/block/null_blk/main.c | 7 ++++--- drivers/block/pktcdvd.c | 7 ++++--- drivers/block/ps3vram.c | 6 +++--- drivers/block/zram/zram_drv.c | 6 +++--- drivers/md/bcache/super.c | 4 ++-- drivers/md/dm.c | 4 ++-- drivers/md/md.c | 7 ++++--- drivers/nvdimm/btt.c | 8 ++++---- drivers/nvdimm/pmem.c | 6 +++--- drivers/nvme/host/multipath.c | 6 +++--- drivers/s390/block/dcssblk.c | 6 +++--- include/linux/blkdev.h | 10 +++++++--- 18 files changed, 69 insertions(+), 52 deletions(-) (limited to 'drivers') diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index a708fbd5a844..539ff56b6968 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -117,9 +117,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) dev->bsize = bsize; dev->bshift = ffs(bsize) - 10; - dev->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!dev->disk) + dev->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(dev->disk)) { + err = PTR_ERR(dev->disk); goto free_dev; + } dev->disk->major = major_num; dev->disk->first_minor = dev_id * 16; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 178cf96ca10a..defc67909a9c 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -264,16 +264,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which, struct proc_dir_entry *procdir) { char tmp[2] = { '0' + which, 0 }; - int err = -ENOMEM; + int err; dev->fd = -1; dev->filename = NULL; spin_lock_init(&dev->lock); dev->users = 0; - dev->gd = blk_alloc_disk(NUMA_NO_NODE); - if (!dev->gd) + dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(dev->gd)) { + err = PTR_ERR(dev->gd); goto out; + } dev->gd->major = simdisk_major; dev->gd->first_minor = which; dev->gd->minors = SIMDISK_MINORS; diff --git a/block/genhd.c b/block/genhd.c index 7a8fd57c51f7..84c822d989da 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1391,20 +1391,21 @@ out_free_disk: return NULL; } -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass) +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass) { - struct queue_limits lim = { }; + struct queue_limits default_lim = { }; struct request_queue *q; struct gendisk *disk; - q = blk_alloc_queue(&lim, node); + q = blk_alloc_queue(lim ? lim : &default_lim, node); if (IS_ERR(q)) - return NULL; + return ERR_CAST(q); disk = __alloc_disk_node(q, node, lkclass); if (!disk) { blk_put_queue(q); - return NULL; + return ERR_PTR(-ENOMEM); } set_bit(GD_OWNS_QUEUE, &disk->state); return disk; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 970bd6ff38c4..689a3c0c31f8 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -335,10 +335,11 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = brd->brd_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_free_dev; - + } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; disk->minors = max_part; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6bc86106c7b2..cea1e537fd56 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2708,9 +2708,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_no_disk; + } device->vdisk = disk; device->rq_queue = disk->queue; diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index d914156db2d8..c64d7ee7a44d 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -131,9 +131,11 @@ static int __init n64cart_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out; + } disk->first_minor = 0; disk->flags = GENHD_FL_NO_PART; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index eeb895ec6f34..baf2b228d008 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2154,10 +2154,11 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q = nullb->disk->queue; } else if (dev->queue_mode == NULL_Q_BIO) { - rv = -ENOMEM; - nullb->disk = blk_alloc_disk(nullb->dev->home_node); - if (!nullb->disk) + nullb->disk = blk_alloc_disk(NULL, nullb->dev->home_node); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); goto out_cleanup_queues; + } nullb->q = nullb->disk->queue; rv = init_driver_queues(nullb); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d56d972aadb3..abb82926b1c9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2673,10 +2673,11 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_on = write_congestion_on; pd->write_congestion_off = write_congestion_off; - ret = -ENOMEM; - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); goto out_mem; + } pd->disk = disk; disk->major = pktdev_major; disk->first_minor = idx; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 38d42af01b25..bdcf083b45e2 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -730,10 +730,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - gendisk = blk_alloc_disk(NUMA_NO_NODE); - if (!gendisk) { + gendisk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(gendisk)) { dev_err(&dev->core, "blk_alloc_disk failed\n"); - error = -ENOMEM; + error = PTR_ERR(gendisk); goto out_cache_cleanup; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6772e0c654fa..84982221fc66 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2195,11 +2195,11 @@ static int zram_add(void) #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!zram->disk) { + zram->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; + ret = PTR_ERR(zram->disk); goto out_free_idr; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dc3f50f69714..9955ecff3839 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -935,8 +935,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) goto out_ida_remove; - d->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!d->disk) + d->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(d->disk)) goto out_bioset_exit; set_capacity(d->disk, sectors); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8dcabf84d866..b5e6a10b9cfd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2098,8 +2098,8 @@ static struct mapped_device *alloc_dev(int minor) * established. If request-based table is loaded: blk-mq will * override accordingly. */ - md->disk = blk_alloc_disk(md->numa_node_id); - if (!md->disk) + md->disk = blk_alloc_disk(NULL, md->numa_node_id); + if (IS_ERR(md->disk)) goto bad; md->queue = md->disk->queue; diff --git a/drivers/md/md.c b/drivers/md/md.c index e2a5f513dbb7..75266c34b1f9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5763,10 +5763,11 @@ struct mddev *md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - error = -ENOMEM; - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + error = PTR_ERR(disk); goto out_free_mddev; + } disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index bb3726b622ad..9a0eae01d598 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1496,11 +1496,11 @@ static int btt_blk_init(struct btt *btt) { struct nd_btt *nd_btt = btt->nd_btt; struct nd_namespace_common *ndns = nd_btt->ndns; - int rc = -ENOMEM; + int rc; - btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE); - if (!btt->btt_disk) - return -ENOMEM; + btt->btt_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(btt->btt_disk)) + return PTR_ERR(btt->btt_disk); nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); btt->btt_disk->first_minor = 0; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4e8fdcb3f1c8..3a5df8d467c5 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -497,9 +497,9 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - disk = blk_alloc_disk(nid); - if (!disk) - return -ENOMEM; + disk = blk_alloc_disk(NULL, nid); + if (IS_ERR(disk)) + return PTR_ERR(disk); q = disk->queue; pmem->disk = disk; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 74de1e64aeea..dc5d0d0a82d0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -532,9 +532,9 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; - head->disk = blk_alloc_disk(ctrl->numa_node); - if (!head->disk) - return -ENOMEM; + head->disk = blk_alloc_disk(NULL, ctrl->numa_node); + if (IS_ERR(head->disk)) + return PTR_ERR(head->disk); head->disk->fops = &nvme_ns_head_ops; head->disk->private_data = head; sprintf(head->disk->disk_name, "nvme%dn%d", diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 4b7ecd4fd431..0903b432ea97 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -629,9 +629,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->dev.release = dcssblk_release_segment; dev_info->dev.groups = dcssblk_dev_attr_groups; INIT_LIST_HEAD(&dev_info->lh); - dev_info->gd = blk_alloc_disk(NUMA_NO_NODE); - if (dev_info->gd == NULL) { - rc = -ENOMEM; + dev_info->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(dev_info->gd)) { + rc = PTR_ERR(dev_info->gd); goto seg_list_del; } dev_info->gd->major = dcssblk_major; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 45746ba73670..a14ea9344138 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -766,22 +766,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, -- cgit v1.2.3 From b5baaba4ce5c8a0e36b5232b16c0731e3eb0d939 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:49 +0100 Subject: brd: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/brd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 689a3c0c31f8..e322cef6596b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -318,6 +318,16 @@ static int brd_alloc(int i) struct gendisk *disk; char buf[DISK_NAME_LEN]; int err = -ENOMEM; + struct queue_limits lim = { + /* + * This is so fdisk will align partitions on 4k, because of + * direct_access API needing 4k alignment, returning a PFN + * (This is only a problem on very small devices <= 4M, + * otherwise fdisk will align on 1M. Regardless this call + * is harmless) + */ + .physical_block_size = PAGE_SIZE, + }; list_for_each_entry(brd, &brd_devices, brd_list) if (brd->brd_number == i) @@ -335,7 +345,7 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_dev; @@ -348,15 +358,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - /* - * This is so fdisk will align partitions on 4k, because of - * direct_access API needing 4k alignment, returning a PFN - * (This is only a problem on very small devices <= 4M, - * otherwise fdisk will align on 1M. Regardless this call - * is harmless) - */ - blk_queue_physical_block_size(disk->queue, PAGE_SIZE); - /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); -- cgit v1.2.3 From cc7f05c7ec0b26e1eda8ec7a99452032d08d305e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:50 +0100 Subject: n64cart: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index c64d7ee7a44d..27b2187e7a6d 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -114,6 +114,10 @@ static const struct block_device_operations n64cart_fops = { */ static int __init n64cart_probe(struct platform_device *pdev) { + struct queue_limits lim = { + .physical_block_size = 4096, + .logical_block_size = 4096, + }; struct gendisk *disk; int err = -ENOMEM; @@ -131,7 +135,7 @@ static int __init n64cart_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out; @@ -147,8 +151,6 @@ static int __init n64cart_probe(struct platform_device *pdev) set_disk_ro(disk, 1); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_physical_block_size(disk->queue, 4096); - blk_queue_logical_block_size(disk->queue, 4096); err = add_disk(disk); if (err) -- cgit v1.2.3 From 4190b3f291d9563a438bf32424a3f049442fc3a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:51 +0100 Subject: zram: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 47 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 84982221fc66..8ee0f7bef190 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2177,6 +2177,28 @@ ATTRIBUTE_GROUPS(zram_disk); */ static int zram_add(void) { + struct queue_limits lim = { + .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, + /* + * To ensure that we always get PAGE_SIZE aligned and + * n*PAGE_SIZED sized I/O requests. + */ + .physical_block_size = PAGE_SIZE, + .io_min = PAGE_SIZE, + .io_opt = PAGE_SIZE, + .max_hw_discard_sectors = UINT_MAX, + /* + * zram_bio_discard() will clear all logical blocks if logical + * block size is identical with physical block size(PAGE_SIZE). + * But if it is different, we will skip discarding some parts of + * logical blocks in the part of the request range which isn't + * aligned to physical block size. So we can't ensure that all + * discarded logical blocks are zeroed. + */ +#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE + .max_write_zeroes_sectors = UINT_MAX, +#endif + }; struct zram *zram; int ret, device_id; @@ -2195,7 +2217,7 @@ static int zram_add(void) #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); @@ -2216,29 +2238,6 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - - /* - * zram_bio_discard() will clear all logical blocks if logical block - * size is identical with physical block size(PAGE_SIZE). But if it is - * different, we will skip discarding some parts of logical blocks in - * the part of the request range which isn't aligned to physical block - * size. So we can't ensure that all discarded logical blocks are - * zeroed. - */ - if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) -- cgit v1.2.3 From b3f0846e720ee59291e3c5235f8a46e70dbc652c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:52 +0100 Subject: bcache: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 9955ecff3839..d06a9649d302 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -900,6 +900,16 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, SIZE_MAX / sizeof(atomic_t)); + struct queue_limits lim = { + .max_hw_sectors = UINT_MAX, + .max_sectors = UINT_MAX, + .max_segment_size = UINT_MAX, + .max_segments = BIO_MAX_VECS, + .max_hw_discard_sectors = UINT_MAX, + .io_min = block_size, + .logical_block_size = block_size, + .physical_block_size = block_size, + }; uint64_t n; int idx; @@ -935,7 +945,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) goto out_ida_remove; - d->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { + /* + * This should only happen with BCACHE_SB_VERSION_BDEV. + * Block/page size is checked for BCACHE_SB_VERSION_CDEV. + */ + pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", + idx, lim.logical_block_size, + PAGE_SIZE, bdev_logical_block_size(cached_bdev)); + + /* This also adjusts physical block size/min io size if needed */ + lim.logical_block_size = bdev_logical_block_size(cached_bdev); + } + + d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(d->disk)) goto out_bioset_exit; @@ -949,27 +972,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->private_data = d; q = d->disk->queue; - q->limits.max_hw_sectors = UINT_MAX; - q->limits.max_sectors = UINT_MAX; - q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_VECS; - blk_queue_max_discard_sectors(q, UINT_MAX); - q->limits.io_min = block_size; - q->limits.logical_block_size = block_size; - q->limits.physical_block_size = block_size; - - if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { - /* - * This should only happen with BCACHE_SB_VERSION_BDEV. - * Block/page size is checked for BCACHE_SB_VERSION_CDEV. - */ - pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", - d->disk->disk_name, q->limits.logical_block_size, - PAGE_SIZE, bdev_logical_block_size(cached_bdev)); - - /* This also adjusts physical block size/min io size if needed */ - blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); - } blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); -- cgit v1.2.3 From 77c059222c31b0480c61964f361b28a4ce111e52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:53 +0100 Subject: btt: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Jiang Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvdimm/btt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9a0eae01d598..4d0c527e8576 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1496,9 +1496,13 @@ static int btt_blk_init(struct btt *btt) { struct nd_btt *nd_btt = btt->nd_btt; struct nd_namespace_common *ndns = nd_btt->ndns; + struct queue_limits lim = { + .logical_block_size = btt->sector_size, + .max_hw_sectors = UINT_MAX, + }; int rc; - btt->btt_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(btt->btt_disk)) return PTR_ERR(btt->btt_disk); @@ -1507,8 +1511,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; - blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size); - blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); -- cgit v1.2.3 From c3d9c3031e18f145d8a12026d4d704125fe901ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:54 +0100 Subject: pmem: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dave Jiang Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvdimm/pmem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 3a5df8d467c5..8dcc10b6db5b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -451,6 +451,11 @@ static int pmem_attach_disk(struct device *dev, { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); + struct queue_limits lim = { + .logical_block_size = pmem_sector_size(ndns), + .physical_block_size = PAGE_SIZE, + .max_hw_sectors = UINT_MAX, + }; int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; struct range bb_range; @@ -497,7 +502,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - disk = blk_alloc_disk(NULL, nid); + disk = blk_alloc_disk(&lim, nid); if (IS_ERR(disk)) return PTR_ERR(disk); q = disk->queue; @@ -539,9 +544,6 @@ static int pmem_attach_disk(struct device *dev, pmem->virt_addr = addr; blk_queue_write_cache(q, true, fua); - blk_queue_physical_block_size(q, PAGE_SIZE); - blk_queue_logical_block_size(q, pmem_sector_size(ndns)); - blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q); if (pmem->pfn_flags & PFN_MAP) -- cgit v1.2.3 From af190c53c995bf7c742c3387f6537534f8b92322 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:55 +0100 Subject: dcssblk: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/s390/block/dcssblk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 0903b432ea97..9c8f529b827c 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -546,6 +546,9 @@ static const struct attribute_group *dcssblk_dev_attr_groups[] = { static ssize_t dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct queue_limits lim = { + .logical_block_size = 4096, + }; int rc, i, j, num_of_segments; struct dcssblk_dev_info *dev_info; struct segment_info *seg_info, *temp; @@ -629,7 +632,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->dev.release = dcssblk_release_segment; dev_info->dev.groups = dcssblk_dev_attr_groups; INIT_LIST_HEAD(&dev_info->lh); - dev_info->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + dev_info->gd = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(dev_info->gd)) { rc = PTR_ERR(dev_info->gd); goto seg_list_del; @@ -639,7 +642,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->fops = &dcssblk_devops; dev_info->gd->private_data = dev_info; dev_info->gd->flags |= GENHD_FL_NO_PART; - blk_queue_logical_block_size(dev_info->gd->queue, 4096); blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue); seg_byte_size = (dev_info->end - dev_info->start + 1); -- cgit v1.2.3 From 9999200f583107f7e244e50935d480433b7d8a3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:45 +0100 Subject: aoe: pass queue_limits to blk_mq_alloc_disk Pass the few limits aoe imposes directly to blk_mq_alloc_disk instead of setting them one at a time and improve the way the default max_hw_sectors is initialized while we're at it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 2ff6e2da8cc4..b6dac8cee70f 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -24,8 +24,8 @@ static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; static struct dentry *aoe_debugfs_dir; -/* GPFS needs a larger value than the default. */ -static int aoe_maxsectors; +/* random default picked from the historic block max_sectors cap */ +static int aoe_maxsectors = 2560; module_param(aoe_maxsectors, int, 0644); MODULE_PARM_DESC(aoe_maxsectors, "When nonzero, set the maximum number of sectors per I/O request"); @@ -334,6 +334,10 @@ aoeblk_gdalloc(void *vp) mempool_t *mp; struct blk_mq_tag_set *set; sector_t ssize; + struct queue_limits lim = { + .max_hw_sectors = aoe_maxsectors, + .io_opt = SZ_2M, + }; ulong flags; int late = 0; int err; @@ -371,7 +375,7 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - gd = blk_mq_alloc_disk(set, NULL, d); + gd = blk_mq_alloc_disk(set, &lim, d); if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); @@ -384,14 +388,9 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->flags & DEVFL_TKILL); WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(gd->queue, 2560u); - blk_queue_io_opt(gd->queue, SZ_2M); d->bufpool = mp; d->blkq = gd->queue; d->gd = gd; - if (aoe_maxsectors) - blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->minors = AOE_PARTITIONS; -- cgit v1.2.3 From 48bc8c7ba6fb39a4325b07f3abe8fe5a77361c7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:46 +0100 Subject: floppy: pass queue_limits to blk_mq_alloc_disk Pass the few limits floppy imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Denis Efremov Link: https://lore.kernel.org/r/20240215070300.2200308-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 582cf50c6bf6..1b399ec8c07d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4516,13 +4516,15 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { + struct queue_limits lim = { + .max_hw_sectors = 64, + }; struct gendisk *disk; - disk = blk_mq_alloc_disk(&tag_sets[drive], NULL, NULL); + disk = blk_mq_alloc_disk(&tag_sets[drive], &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); - blk_queue_max_hw_sectors(disk->queue, 64); disk->major = FLOPPY_MAJOR; disk->first_minor = TOMINOR(drive) | (type << 2); disk->minors = 1; -- cgit v1.2.3 From 68c3135fb5fbd85c7b2ca851184f30f54433a9d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:47 +0100 Subject: mtip: pass queue_limits to blk_mq_alloc_disk Pass the few limits mtip imposes directly to blk_mq_alloc_disk instead of setting them one at a time and drop the pointless setting of a io_min that is equal to the physical block size. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ac08dea73552..43a187609ef7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3401,6 +3401,12 @@ static const struct blk_mq_ops mtip_mq_ops = { */ static int mtip_block_initialize(struct driver_data *dd) { + struct queue_limits lim = { + .physical_block_size = 4096, + .max_hw_sectors = 0xffff, + .max_segments = MTIP_MAX_SG, + .max_segment_size = 0x400000, + }; int rv = 0, wait_for_rebuild = 0; sector_t capacity; unsigned int index = 0; @@ -3431,7 +3437,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - dd->disk = blk_mq_alloc_disk(&dd->tags, NULL, dd); + dd->disk = blk_mq_alloc_disk(&dd->tags, &lim, dd); if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3481,12 +3487,7 @@ skip_create_disk: /* Set device limits. */ blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); - blk_queue_max_segments(dd->queue, MTIP_MAX_SG); - blk_queue_physical_block_size(dd->queue, 4096); - blk_queue_max_hw_sectors(dd->queue, 0xffff); - blk_queue_max_segment_size(dd->queue, 0x400000); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); - blk_queue_io_min(dd->queue, 4096); /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { -- cgit v1.2.3 From 9a0d4970288de29191fa45bf0ab4d8398bfa3a01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:48 +0100 Subject: nbd: pass queue_limits to blk_mq_alloc_disk Pass the few limits nbd imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d2b422d842b7..9ee9587375fa 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1783,6 +1783,12 @@ static const struct blk_mq_ops nbd_mq_ops = { static struct nbd_device *nbd_dev_add(int index, unsigned int refs) { + struct queue_limits lim = { + .max_hw_sectors = 65536, + .max_user_sectors = 256, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; struct nbd_device *nbd; struct gendisk *disk; int err = -ENOMEM; @@ -1823,7 +1829,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) if (err < 0) goto out_free_tags; - disk = blk_mq_alloc_disk(&nbd->tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_idr; @@ -1843,11 +1849,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) * Tell the block layer that we are not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_max_discard_sectors(disk->queue, 0); - blk_queue_max_segment_size(disk->queue, UINT_MAX); - blk_queue_max_segments(disk->queue, USHRT_MAX); - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); -- cgit v1.2.3 From a7f18b74dbe171625afc2751942a92f71a4dd4ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:49 +0100 Subject: ps3disk: pass queue_limits to blk_mq_alloc_disk Pass the few limits ps3disk imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index dfd3860df4f8..b810ac0a5c4b 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -382,6 +382,14 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv; int error; unsigned int devidx; + struct queue_limits lim = { + .logical_block_size = dev->blk_size, + .max_hw_sectors = dev->bounce_size >> 9, + .max_segments = -1, + .max_segment_size = dev->bounce_size, + .dma_alignment = dev->blk_size - 1, + }; + struct request_queue *queue; struct gendisk *gendisk; @@ -431,7 +439,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) if (error) goto fail_teardown; - gendisk = blk_mq_alloc_disk(&priv->tag_set, NULL, dev); + gendisk = blk_mq_alloc_disk(&priv->tag_set, &lim, dev); if (IS_ERR(gendisk)) { dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", __func__, __LINE__); @@ -441,15 +449,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) queue = gendisk->queue; - blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); - blk_queue_dma_alignment(queue, dev->blk_size-1); - blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_write_cache(queue, true, false); - blk_queue_max_segments(queue, -1); - blk_queue_max_segment_size(queue, dev->bounce_size); - priv->gendisk = gendisk; gendisk->major = ps3disk_major; gendisk->first_minor = devidx * PS3DISK_MINORS; -- cgit v1.2.3 From 24f30b770c0f450346f1c99120427b2e938cdfd0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:50 +0100 Subject: rbd: pass queue_limits to blk_mq_alloc_disk Pass the limits rbd imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6b4f1898a722..26ff5cd2bf0a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4952,6 +4952,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) struct request_queue *q; unsigned int objset_bytes = rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; + struct queue_limits lim = { + .max_hw_sectors = objset_bytes >> SECTOR_SHIFT, + .max_user_sectors = objset_bytes >> SECTOR_SHIFT, + .io_min = rbd_dev->opts->alloc_size, + .io_opt = rbd_dev->opts->alloc_size, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; int err; memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); @@ -4966,7 +4974,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (err) return err; - disk = blk_mq_alloc_disk(&rbd_dev->tag_set, NULL, rbd_dev); + if (rbd_dev->opts->trim) { + lim.discard_granularity = rbd_dev->opts->alloc_size; + lim.max_hw_discard_sectors = objset_bytes >> SECTOR_SHIFT; + lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT; + } + + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; @@ -4987,19 +5001,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); - q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, USHRT_MAX); - blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, rbd_dev->opts->alloc_size); - blk_queue_io_opt(q, rbd_dev->opts->alloc_size); - - if (rbd_dev->opts->trim) { - q->limits.discard_granularity = rbd_dev->opts->alloc_size; - blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); - blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); - } - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); -- cgit v1.2.3 From e6ed9892f10d7195d621ede1cedc41421f1ca607 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:51 +0100 Subject: rnbd-clt: pass queue_limits to blk_mq_alloc_disk Pass the limits rnbd-clt imposes directly to blk_mq_alloc_disk instead of setting them one at a time. While at it don't set an explicit number of discard segments, as 1 is the default (which most drivers rely on). Signed-off-by: Christoph Hellwig Acked-by: Jack Wang Link: https://lore.kernel.org/r/20240215070300.2200308-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 64 +++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index d51be4f2df61..b7ffe03c6160 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1329,43 +1329,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) } } -static void setup_request_queue(struct rnbd_clt_dev *dev, - struct rnbd_msg_open_rsp *rsp) -{ - blk_queue_logical_block_size(dev->queue, - le16_to_cpu(rsp->logical_block_size)); - blk_queue_physical_block_size(dev->queue, - le16_to_cpu(rsp->physical_block_size)); - blk_queue_max_hw_sectors(dev->queue, - dev->sess->max_io_size / SECTOR_SIZE); - - /* - * we don't support discards to "discontiguous" segments - * in on request - */ - blk_queue_max_discard_segments(dev->queue, 1); - - blk_queue_max_discard_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - dev->queue->limits.discard_granularity = - le32_to_cpu(rsp->discard_granularity); - dev->queue->limits.discard_alignment = - le32_to_cpu(rsp->discard_alignment); - if (le16_to_cpu(rsp->secure_discard)) - blk_queue_max_secure_erase_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); - blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); - blk_queue_max_segments(dev->queue, dev->sess->max_segments); - blk_queue_io_opt(dev->queue, dev->sess->max_io_size); - blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, - !!(rsp->cache_policy & RNBD_WRITEBACK), - !!(rsp->cache_policy & RNBD_FUA)); - blk_queue_max_write_zeroes_sectors(dev->queue, - le32_to_cpu(rsp->max_write_zeroes_sectors)); -} - static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp, int idx) { @@ -1403,18 +1366,41 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp) { + struct queue_limits lim = { + .logical_block_size = le16_to_cpu(rsp->logical_block_size), + .physical_block_size = le16_to_cpu(rsp->physical_block_size), + .io_opt = dev->sess->max_io_size, + .max_hw_sectors = dev->sess->max_io_size / SECTOR_SIZE, + .max_hw_discard_sectors = le32_to_cpu(rsp->max_discard_sectors), + .discard_granularity = le32_to_cpu(rsp->discard_granularity), + .discard_alignment = le32_to_cpu(rsp->discard_alignment), + .max_segments = dev->sess->max_segments, + .virt_boundary_mask = SZ_4K - 1, + .max_write_zeroes_sectors = + le32_to_cpu(rsp->max_write_zeroes_sectors), + }; int idx = dev->clt_device_id; dev->size = le64_to_cpu(rsp->nsectors) * le16_to_cpu(rsp->logical_block_size); - dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, NULL, dev); + if (rsp->secure_discard) { + lim.max_secure_erase_sectors = + le32_to_cpu(rsp->max_discard_sectors); + } + + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); dev->queue = dev->gd->queue; rnbd_init_mq_hw_queues(dev); - setup_request_queue(dev, rsp); + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); + blk_queue_write_cache(dev->queue, + !!(rsp->cache_policy & RNBD_WRITEBACK), + !!(rsp->cache_policy & RNBD_FUA)); + return rnbd_clt_setup_gen_disk(dev, rsp, idx); } -- cgit v1.2.3 From d0fa9a8b0af71b69cf3dec10feaebe19d55a72cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:52 +0100 Subject: sunvdc: pass queue_limits to blk_mq_alloc_disk Pass the few limits sunvdc imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/sunvdc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index a1f74dd1eae5..c99dd6698977 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -784,6 +784,14 @@ static const struct blk_mq_ops vdc_mq_ops = { static int probe_disk(struct vdc_port *port) { + struct queue_limits lim = { + .physical_block_size = port->vdisk_phys_blksz, + .max_hw_sectors = port->max_xfer_size, + /* Each segment in a request is up to an aligned page in size. */ + .seg_boundary_mask = PAGE_SIZE - 1, + .max_segment_size = PAGE_SIZE, + .max_segments = port->ring_cookies, + }; struct request_queue *q; struct gendisk *g; int err; @@ -824,7 +832,7 @@ static int probe_disk(struct vdc_port *port) if (err) return err; - g = blk_mq_alloc_disk(&port->tag_set, NULL, port); + g = blk_mq_alloc_disk(&port->tag_set, &lim, port); if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); @@ -835,12 +843,6 @@ static int probe_disk(struct vdc_port *port) port->disk = g; q = g->queue; - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_max_segment_size(q, PAGE_SIZE); - - blk_queue_max_segments(q, port->ring_cookies); - blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT; g->minors = 1 << PARTITION_SHIFT; @@ -872,8 +874,6 @@ static int probe_disk(struct vdc_port *port) } } - blk_queue_physical_block_size(q, port->vdisk_phys_blksz); - pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, port->vdisk_size, (port->vdisk_size >> (20 - 9)), -- cgit v1.2.3 From a339cf2bbfbe6e16ead79276d608912d36065884 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:53 +0100 Subject: gdrom: pass queue_limits to blk_mq_alloc_disk Pass the few limits gdrom imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-11-hch@lst.de Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1d044779f5e4..9398beeb5d1e 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -724,11 +724,6 @@ static void probe_gdrom_setupdisk(void) static int probe_gdrom_setupqueue(void) { - blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR); - /* using DMA so memory will need to be contiguous */ - blk_queue_max_segments(gd.gdrom_rq, 1); - /* set a large max size to get most from DMA */ - blk_queue_max_segment_size(gd.gdrom_rq, 0x40000); gd.disk->queue = gd.gdrom_rq; return gdrom_init_dma_mode(); } @@ -743,6 +738,13 @@ static const struct blk_mq_ops gdrom_mq_ops = { */ static int probe_gdrom(struct platform_device *devptr) { + struct queue_limits lim = { + .logical_block_size = GDROM_HARD_SECTOR, + /* using DMA so memory will need to be contiguous */ + .max_segments = 1, + /* set a large max size to get most from DMA */ + .max_segment_size = 0x40000, + }; int err; /* @@ -778,7 +780,7 @@ static int probe_gdrom(struct platform_device *devptr) if (err) goto probe_fail_free_cd_info; - gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL, NULL); + gd.disk = blk_mq_alloc_disk(&gd.tag_set, &lim, NULL); if (IS_ERR(gd.disk)) { err = PTR_ERR(gd.disk); goto probe_fail_free_tag_set; -- cgit v1.2.3 From f93b43ae3feafedc5777099ca1a0e05352b92671 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:54 +0100 Subject: ms_block: pass queue_limits to blk_mq_alloc_disk Pass the few limits ms_block imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/memstick/core/ms_block.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index d3277c901d16..47a314a4eb6f 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2078,6 +2078,12 @@ static const struct blk_mq_ops msb_mq_ops = { static int msb_init_disk(struct memstick_dev *card) { struct msb_data *msb = memstick_get_drvdata(card); + struct queue_limits lim = { + .logical_block_size = msb->page_size, + .max_hw_sectors = MS_BLOCK_MAX_PAGES, + .max_segments = MS_BLOCK_MAX_SEGS, + .max_segment_size = MS_BLOCK_MAX_PAGES * msb->page_size, + }; int rc; unsigned long capacity; @@ -2093,19 +2099,13 @@ static int msb_init_disk(struct memstick_dev *card) if (rc) goto out_release_id; - msb->disk = blk_mq_alloc_disk(&msb->tag_set, NULL, card); + msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card); if (IS_ERR(msb->disk)) { rc = PTR_ERR(msb->disk); goto out_free_tag_set; } msb->queue = msb->disk->queue; - blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); - blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS); - blk_queue_max_segment_size(msb->queue, - MS_BLOCK_MAX_PAGES * msb->page_size); - blk_queue_logical_block_size(msb->queue, msb->page_size); - sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id); msb->disk->fops = &msb_bdops; msb->disk->private_data = msb; -- cgit v1.2.3 From 9f633ecd43046659e3345bc4a4404e1d2ba67463 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:55 +0100 Subject: mspro_block: pass queue_limits to blk_mq_alloc_disk Pass the few limits mspro_block imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-13-hch@lst.de Signed-off-by: Jens Axboe --- drivers/memstick/core/mspro_block.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index db0e2a42ca3c..49accfdc89d6 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1103,6 +1103,12 @@ static const struct blk_mq_ops mspro_mq_ops = { static int mspro_block_init_disk(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); + struct queue_limits lim = { + .logical_block_size = msb->page_size, + .max_hw_sectors = MSPRO_BLOCK_MAX_PAGES, + .max_segments = MSPRO_BLOCK_MAX_SEGS, + .max_segment_size = MSPRO_BLOCK_MAX_PAGES * msb->page_size, + }; struct mspro_devinfo *dev_info = NULL; struct mspro_sys_info *sys_info = NULL; struct mspro_sys_attr *s_attr = NULL; @@ -1138,18 +1144,13 @@ static int mspro_block_init_disk(struct memstick_dev *card) if (rc) goto out_release_id; - msb->disk = blk_mq_alloc_disk(&msb->tag_set, NULL, card); + msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card); if (IS_ERR(msb->disk)) { rc = PTR_ERR(msb->disk); goto out_free_tag_set; } msb->queue = msb->disk->queue; - blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES); - blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS); - blk_queue_max_segment_size(msb->queue, - MSPRO_BLOCK_MAX_PAGES * msb->page_size); - msb->disk->major = major; msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT; msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT; @@ -1158,8 +1159,6 @@ static int mspro_block_init_disk(struct memstick_dev *card) sprintf(msb->disk->disk_name, "mspblk%d", disk_id); - blk_queue_logical_block_size(msb->queue, msb->page_size); - capacity = be16_to_cpu(sys_info->user_block_count); capacity *= be16_to_cpu(sys_info->block_size); capacity *= msb->page_size >> 9; -- cgit v1.2.3 From 3ec44e52bfce60f6da65165bc86eb382462d173d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:56 +0100 Subject: mtd_blkdevs: pass queue_limits to blk_mq_alloc_disk Pass the few limits mtd_blkdevs imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-14-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index b8878a2457af..3caa0717d46c 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -277,6 +277,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) { struct mtd_blktrans_ops *tr = new->tr; struct mtd_blktrans_dev *d; + struct queue_limits lim = { }; int last_devnum = -1; struct gendisk *gd; int ret; @@ -331,9 +332,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); if (ret) goto out_kfree_tag_set; + + lim.logical_block_size = tr->blksize; + if (tr->discard) + lim.max_hw_discard_sectors = UINT_MAX; /* Create gendisk */ - gd = blk_mq_alloc_disk(new->tag_set, NULL, new); + gd = blk_mq_alloc_disk(new->tag_set, &lim, new); if (IS_ERR(gd)) { ret = PTR_ERR(gd); goto out_free_tag_set; @@ -371,14 +376,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (tr->flush) blk_queue_write_cache(new->rq, true, false); - blk_queue_logical_block_size(new->rq, tr->blksize); - blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); - if (tr->discard) - blk_queue_max_discard_sectors(new->rq, UINT_MAX); - gd->queue = new->rq; if (new->readonly) -- cgit v1.2.3 From 21b700c0812b6aa8f794c36b971772b2b08dab9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:57 +0100 Subject: ubiblock: pass queue_limits to blk_mq_alloc_disk Pass the few limits ubiblock imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240215070300.2200308-15-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mtd/ubi/block.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 9be87c231a2e..5c8fdcc088a0 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -348,6 +348,9 @@ static int calc_disk_capacity(struct ubi_volume_info *vi, u64 *disk_capacity) int ubiblock_create(struct ubi_volume_info *vi) { + struct queue_limits lim = { + .max_segments = UBI_MAX_SG_COUNT, + }; struct ubiblock *dev; struct gendisk *gd; u64 disk_capacity; @@ -393,7 +396,7 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Initialize the gendisk of this ubiblock device */ - gd = blk_mq_alloc_disk(&dev->tag_set, NULL, dev); + gd = blk_mq_alloc_disk(&dev->tag_set, &lim, dev); if (IS_ERR(gd)) { ret = PTR_ERR(gd); goto out_free_tags; @@ -416,7 +419,6 @@ int ubiblock_create(struct ubi_volume_info *vi) dev->gd = gd; dev->rq = gd->queue; - blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT); list_add_tail(&dev->list, &ubiblock_devices); -- cgit v1.2.3 From 066be10aef5a7ddd8ad537db7a5145c6d79d4ea2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:58 +0100 Subject: scm_blk: pass queue_limits to blk_mq_alloc_disk Pass the few limits scm_block imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-16-hch@lst.de Signed-off-by: Jens Axboe --- drivers/s390/block/scm_blk.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index d05b2e2799a4..9f6fdd0daa74 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -435,10 +435,17 @@ static const struct blk_mq_ops scm_mq_ops = { int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) { - unsigned int devindex, nr_max_blk; + struct queue_limits lim = { + .logical_block_size = 1 << 12, + }; + unsigned int devindex; struct request_queue *rq; int len, ret; + lim.max_segments = min(scmdev->nr_max_block, + (unsigned int) (PAGE_SIZE / sizeof(struct aidaw))); + lim.max_hw_sectors = lim.max_segments << 3; /* 8 * 512 = blk_size */ + devindex = atomic_inc_return(&nr_devices) - 1; /* scma..scmz + scmaa..scmzz */ if (devindex > 701) { @@ -462,18 +469,12 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) if (ret) goto out; - bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, NULL, scmdev); + bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, &lim, scmdev); if (IS_ERR(bdev->gendisk)) { ret = PTR_ERR(bdev->gendisk); goto out_tag; } rq = bdev->rq = bdev->gendisk->queue; - nr_max_blk = min(scmdev->nr_max_block, - (unsigned int) (PAGE_SIZE / sizeof(struct aidaw))); - - blk_queue_logical_block_size(rq, 1 << 12); - blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */ - blk_queue_max_segments(rq, nr_max_blk); blk_queue_flag_set(QUEUE_FLAG_NONROT, rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); -- cgit v1.2.3 From 494ea040bcb5f4cc78c37dc53c7915752c24f739 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:59 +0100 Subject: ublk: pass queue_limits to blk_mq_alloc_disk Pass the limits ublk imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-17-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 90 ++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c5b655270798..01afe90a47ac 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -246,21 +246,12 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return 0; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - const struct ublk_param_zoned *p = &ub->params.zoned; - - disk_set_zoned(ub->ub_disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); - disk_set_max_active_zones(ub->ub_disk, p->max_active_zones); - disk_set_max_open_zones(ub->ub_disk, p->max_open_zones); - blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors); - ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); - - return 0; } /* Based on virtblk_alloc_report_buffer */ @@ -432,9 +423,8 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return -EOPNOTSUPP; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - return -EOPNOTSUPP; } static int ublk_revalidate_disk_zones(struct ublk_device *ub) @@ -498,11 +488,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - blk_queue_logical_block_size(q, 1 << p->logical_bs_shift); - blk_queue_physical_block_size(q, 1 << p->physical_bs_shift); - blk_queue_io_min(q, 1 << p->io_min_shift); - blk_queue_io_opt(q, 1 << p->io_opt_shift); - blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE, p->attrs & UBLK_ATTR_FUA); if (p->attrs & UBLK_ATTR_ROTATIONAL) @@ -510,29 +495,12 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - blk_queue_max_hw_sectors(q, p->max_sectors); - blk_queue_chunk_sectors(q, p->chunk_sectors); - blk_queue_virt_boundary(q, p->virt_boundary_mask); - if (p->attrs & UBLK_ATTR_READ_ONLY) set_disk_ro(ub->ub_disk, true); set_capacity(ub->ub_disk, p->dev_sectors); } -static void ublk_dev_param_discard_apply(struct ublk_device *ub) -{ - struct request_queue *q = ub->ub_disk->queue; - const struct ublk_param_discard *p = &ub->params.discard; - - q->limits.discard_alignment = p->discard_alignment; - q->limits.discard_granularity = p->discard_granularity; - blk_queue_max_discard_sectors(q, p->max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, - p->max_write_zeroes_sectors); - blk_queue_max_discard_segments(q, p->max_discard_segments); -} - static int ublk_validate_params(const struct ublk_device *ub) { /* basic param is the only one which must be set */ @@ -576,20 +544,12 @@ static int ublk_validate_params(const struct ublk_device *ub) return 0; } -static int ublk_apply_params(struct ublk_device *ub) +static void ublk_apply_params(struct ublk_device *ub) { - if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) - return -EINVAL; - ublk_dev_param_basic_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) - ublk_dev_param_discard_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_ZONED) - return ublk_dev_param_zoned_apply(ub); - - return 0; + ublk_dev_param_zoned_apply(ub); } static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) @@ -2205,12 +2165,47 @@ static struct ublk_device *ublk_get_device_from_id(int idx) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); + const struct ublk_param_basic *p = &ub->params.basic; int ublksrv_pid = (int)header->data[0]; + struct queue_limits lim = { + .logical_block_size = 1 << p->logical_bs_shift, + .physical_block_size = 1 << p->physical_bs_shift, + .io_min = 1 << p->io_min_shift, + .io_opt = 1 << p->io_opt_shift, + .max_hw_sectors = p->max_sectors, + .chunk_sectors = p->chunk_sectors, + .virt_boundary_mask = p->virt_boundary_mask, + + }; struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) return -EINVAL; + if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) + return -EINVAL; + + if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) { + const struct ublk_param_discard *pd = &ub->params.discard; + + lim.discard_alignment = pd->discard_alignment; + lim.discard_granularity = pd->discard_granularity; + lim.max_hw_discard_sectors = pd->max_discard_sectors; + lim.max_write_zeroes_sectors = pd->max_write_zeroes_sectors; + lim.max_discard_segments = pd->max_discard_segments; + } + + if (ub->params.types & UBLK_PARAM_TYPE_ZONED) { + const struct ublk_param_zoned *p = &ub->params.zoned; + + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return -EOPNOTSUPP; + + lim.zoned = true; + lim.max_active_zones = p->max_active_zones; + lim.max_open_zones = p->max_open_zones; + lim.max_zone_append_sectors = p->max_zone_append_sectors; + } if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; @@ -2222,7 +2217,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&ub->tag_set, &lim, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; @@ -2234,9 +2229,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) ub->dev_info.ublksrv_pid = ublksrv_pid; ub->ub_disk = disk; - ret = ublk_apply_params(ub); - if (ret) - goto out_put_disk; + ublk_apply_params(ub); /* don't probe partitions if any one ubq daemon is un-trusted */ if (ub->nr_privileged_daemon != ub->nr_queues_ready) @@ -2262,7 +2255,6 @@ out_put_cdev: ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); } -out_put_disk: if (ret) put_disk(disk); out_unlock: -- cgit v1.2.3 From 616f8766179277324393f7b77e07f14cb3503825 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:03:00 +0100 Subject: mmc: pass queue_limits to blk_mq_alloc_disk Pass the queue limit set at initialization time directly to blk_mq_alloc_disk instead of updating it right after the allocation. This requires refactoring the code a bit so that what was mmc_setup_queue before also allocates the gendisk now and actually sets all limits. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Link: https://lore.kernel.org/r/20240215070300.2200308-18-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mmc/core/queue.c | 97 ++++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 67ad186d132a..2ae60d208cdf 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) return sg; } -static void mmc_queue_setup_discard(struct request_queue *q, - struct mmc_card *card) +static void mmc_queue_setup_discard(struct mmc_card *card, + struct queue_limits *lim) { unsigned max_discard; @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - blk_queue_max_discard_sectors(q, max_discard); - q->limits.discard_granularity = card->pref_erase << 9; - /* granularity must not be greater than max. discard */ - if (card->pref_erase > max_discard) - q->limits.discard_granularity = SECTOR_SIZE; + lim->max_hw_discard_sectors = max_discard; if (mmc_can_secure_erase_trim(card)) - blk_queue_max_secure_erase_sectors(q, max_discard); + lim->max_secure_erase_sectors = max_discard; if (mmc_can_trim(card) && card->erased_byte == 0) - blk_queue_max_write_zeroes_sectors(q, max_discard); + lim->max_write_zeroes_sectors = max_discard; + + /* granularity must not be greater than max. discard */ + if (card->pref_erase > max_discard) + lim->discard_granularity = SECTOR_SIZE; + else + lim->discard_granularity = card->pref_erase << 9; } static unsigned short mmc_get_max_segments(struct mmc_host *host) @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { .timeout = mmc_mq_timed_out, }; -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, + struct mmc_card *card) { struct mmc_host *host = card->host; - unsigned block_size = 512; + struct queue_limits lim = { }; + struct gendisk *disk; - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); if (mmc_can_erase(card)) - mmc_queue_setup_discard(mq->queue, card); + mmc_queue_setup_discard(card, &lim); if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(mq->queue, - min(host->max_blk_count, host->max_req_size / 512)); - if (host->can_dma_map_merge) - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, - mmc_dev(host)), - "merging was advertised but not possible"); - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); - - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { - block_size = card->ext_csd.data_sector_size; - WARN_ON(block_size != 512 && block_size != 4096); - } + lim.bounce = BLK_BOUNCE_HIGH; + + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); + + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) + lim.logical_block_size = card->ext_csd.data_sector_size; + else + lim.logical_block_size = 512; + + WARN_ON_ONCE(lim.logical_block_size != 512 && + lim.logical_block_size != 4096); - blk_queue_logical_block_size(mq->queue, block_size); /* - * After blk_queue_can_use_dma_map_merging() was called with succeed, - * since it calls blk_queue_virt_boundary(), the mmc should not call - * both blk_queue_max_segment_size(). + * Setting a virt_boundary implicity sets a max_segment_size, so try + * to set the hardware one here. */ - if (!host->can_dma_map_merge) - blk_queue_max_segment_size(mq->queue, - round_down(host->max_seg_size, block_size)); + if (host->can_dma_map_merge) { + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; + } else { + lim.max_segment_size = + round_down(host->max_seg_size, lim.logical_block_size); + lim.max_segments = host->max_segs; + } + + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); + if (IS_ERR(disk)) + return disk; + mq->queue = disk->queue; + + if (mmc_host_is_spi(host) && host->use_spi_crc) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); + blk_queue_rq_timeout(mq->queue, 60 * HZ); + + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); mmc_crypto_setup_queue(mq->queue, host); + return disk; } static inline bool mmc_merge_capable(struct mmc_host *host) @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) return ERR_PTR(ret); - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); - if (IS_ERR(disk)) { + disk = mmc_alloc_disk(mq, card); + if (IS_ERR(disk)) blk_mq_free_tag_set(&mq->tag_set); - return disk; - } - mq->queue = disk->queue; - - if (mmc_host_is_spi(host) && host->use_spi_crc) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); - blk_queue_rq_timeout(mq->queue, 60 * HZ); - - mmc_setup_queue(mq, card); return disk; } -- cgit v1.2.3 From 8b631f9cf0b84ac59cd4f0c6dcd2d0cb80dd8a49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:44 +0100 Subject: null_blk: remove the bio based I/O path The bio based I/O path complicates null_blk and also make various data structures, including the per-command one way bigger than required for the main request based interface. As the bio-based path is mostly used by stacking drivers and simple memory based drivers, and brd is a good example driver for the latter there is no need to have a bio based path in null_blk. Remove the path to simplify the driver and make future block layer API changes simpler by not having to deal with the complex two API setup in null_blk. Note that the queue_mode field in struct nullb_device is kept as that is simpler than having two different places to check the value and fully open coding the debugfs helpers as the existing ones won't work without a named struct member. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 365 +++++++------------------------------- drivers/block/null_blk/null_blk.h | 17 -- drivers/block/null_blk/trace.h | 5 +- drivers/block/null_blk/zoned.c | 10 +- 4 files changed, 69 insertions(+), 328 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index baf2b228d008..d6836327eefb 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=,,,"); #endif +/* + * Historic queue modes. + * + * These days nothing but NULL_Q_MQ is actually supported, but we keep it the + * enum for error reporting. + */ +enum { + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) @@ -756,98 +768,11 @@ static void null_free_dev(struct nullb_device *dev) kfree(dev); } -static void put_tag(struct nullb_queue *nq, unsigned int tag) -{ - clear_bit_unlock(tag, nq->tag_map); - - if (waitqueue_active(&nq->wait)) - wake_up(&nq->wait); -} - -static unsigned int get_tag(struct nullb_queue *nq) -{ - unsigned int tag; - - do { - tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); - if (tag >= nq->queue_depth) - return -1U; - } while (test_and_set_bit_lock(tag, nq->tag_map)); - - return tag; -} - -static void free_cmd(struct nullb_cmd *cmd) -{ - put_tag(cmd->nq, cmd->tag); -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); - -static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - unsigned int tag; - - tag = get_tag(nq); - if (tag != -1U) { - cmd = &nq->cmds[tag]; - cmd->tag = tag; - cmd->error = BLK_STS_OK; - cmd->nq = nq; - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - return cmd; - } - - return NULL; -} - -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) -{ - struct nullb_cmd *cmd; - DEFINE_WAIT(wait); - - do { - /* - * This avoids multiple return statements, multiple calls to - * __alloc_cmd() and a fast path call to prepare_to_wait(). - */ - cmd = __alloc_cmd(nq); - if (cmd) { - cmd->bio = bio; - return cmd; - } - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule(); - finish_wait(&nq->wait, &wait); - } while (1); -} - -static void end_cmd(struct nullb_cmd *cmd) -{ - int queue_mode = cmd->nq->dev->queue_mode; - - switch (queue_mode) { - case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, cmd->error); - return; - case NULL_Q_BIO: - cmd->bio->bi_status = cmd->error; - bio_endio(cmd->bio); - break; - } - - free_cmd(cmd); -} - static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) { - end_cmd(container_of(timer, struct nullb_cmd, timer)); + struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer); + blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error); return HRTIMER_NORESTART; } @@ -860,7 +785,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_complete_rq(struct request *rq) { - end_cmd(blk_mq_rq_to_pdu(rq)); + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + + blk_mq_end_request(rq, cmd->error); } static struct nullb_page *null_alloc_page(void) @@ -1277,7 +1204,7 @@ static int null_transfer(struct nullb *nullb, struct page *page, static int null_handle_rq(struct nullb_cmd *cmd) { - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; @@ -1302,63 +1229,21 @@ static int null_handle_rq(struct nullb_cmd *cmd) return 0; } -static int null_handle_bio(struct nullb_cmd *cmd) -{ - struct bio *bio = cmd->bio; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector = bio->bi_iter.bi_sector; - struct bio_vec bvec; - struct bvec_iter iter; - - spin_lock_irq(&nullb->lock); - bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector, - bio->bi_opf & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - return 0; -} - -static void null_stop_queue(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_stop_hw_queues(q); -} - -static void null_restart_queue_async(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_start_stopped_hw_queues(q, true); -} - static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts = BLK_STS_OK; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); if (!hrtimer_active(&nullb->bw_timer)) hrtimer_restart(&nullb->bw_timer); if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) { - null_stop_queue(nullb); + blk_mq_stop_hw_queues(nullb->q); /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); /* requeue request */ sts = BLK_STS_DEV_RESOURCE; } @@ -1385,37 +1270,29 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; - int err; if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); + return errno_to_blk_status(null_handle_rq(cmd)); - if (dev->queue_mode == NULL_Q_BIO) - err = null_handle_bio(cmd); - else - err = null_handle_rq(cmd); - - return errno_to_blk_status(err); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb_device *dev = cmd->nq->dev; struct bio *bio; - if (dev->memory_backed) - return; - - if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) { - zero_fill_bio(cmd->bio); - } else if (req_op(cmd->rq) == REQ_OP_READ) { - __rq_for_each_bio(bio, cmd->rq) + if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) { + __rq_for_each_bio(bio, rq) zero_fill_bio(bio); } } static inline void nullb_complete_cmd(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + /* * Since root privileges are required to configure the null_blk * driver, it is fine that this driver does not initialize the @@ -1429,20 +1306,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) /* Complete IO by inline, softirq or timer */ switch (cmd->nq->dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (cmd->nq->dev->queue_mode) { - case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); - break; - case NULL_Q_BIO: - /* - * XXX: no proper submitting cpu information available. - */ - end_cmd(cmd); - break; - } + blk_mq_complete_request(rq); break; case NULL_IRQ_NONE: - end_cmd(cmd); + blk_mq_end_request(rq, cmd->error); break; case NULL_IRQ_TIMER: null_cmd_end_timer(cmd); @@ -1503,7 +1370,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); hrtimer_forward_now(&nullb->bw_timer, timer_interval); @@ -1520,26 +1387,6 @@ static void nullb_setup_bwtimer(struct nullb *nullb) hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); } -static struct nullb_queue *nullb_to_queue(struct nullb *nullb) -{ - int index = 0; - - if (nullb->nr_queues != 1) - index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); - - return &nullb->queues[index]; -} - -static void null_submit_bio(struct bio *bio) -{ - sector_t sector = bio->bi_iter.bi_sector; - sector_t nr_sectors = bio_sectors(bio); - struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; - struct nullb_queue *nq = nullb_to_queue(nullb); - - null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); -} - #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static bool should_timeout_request(struct request *rq) @@ -1659,7 +1506,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) blk_rq_sectors(req)); if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, blk_mq_end_request_batch)) - end_cmd(cmd); + blk_mq_end_request(req, cmd->error); nr++; } @@ -1715,7 +1562,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } - cmd->rq = rq; cmd->error = BLK_STS_OK; cmd->nq = nq; cmd->fake_timeout = should_timeout_request(rq) || @@ -1774,22 +1620,6 @@ static void null_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } -static void cleanup_queue(struct nullb_queue *nq) -{ - bitmap_free(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { struct nullb_queue *nq = hctx->driver_data; @@ -1800,8 +1630,6 @@ static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; nq->dev = nullb->dev; INIT_LIST_HEAD(&nq->poll_list); spin_lock_init(&nq->poll_lock); @@ -1853,14 +1681,13 @@ static void null_del_dev(struct nullb *nullb) if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { hrtimer_cancel(&nullb->bw_timer); atomic_long_set(&nullb->cur_bytes, LONG_MAX); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); } put_disk(nullb->disk); - if (dev->queue_mode == NULL_Q_MQ && - nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - cleanup_queues(nullb); + kfree(nullb->queues); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); kfree(nullb); @@ -1887,40 +1714,11 @@ static void null_config_discard(struct nullb *nullb) blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); } -static const struct block_device_operations null_bio_ops = { +static const struct block_device_operations null_ops = { .owner = THIS_MODULE, - .submit_bio = null_submit_bio, .report_zones = null_report_zones, }; -static const struct block_device_operations null_rq_ops = { - .owner = THIS_MODULE, - .report_zones = null_report_zones, -}; - -static int setup_commands(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - int i; - - nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); - if (!nq->cmds) - return -ENOMEM; - - nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL); - if (!nq->tag_map) { - kfree(nq->cmds); - return -ENOMEM; - } - - for (i = 0; i < nq->queue_depth; i++) { - cmd = &nq->cmds[i]; - cmd->tag = -1U; - } - - return 0; -} - static int setup_queues(struct nullb *nullb) { int nqueues = nr_cpu_ids; @@ -1937,24 +1735,6 @@ static int setup_queues(struct nullb *nullb) return 0; } -static int init_driver_queues(struct nullb *nullb) -{ - struct nullb_queue *nq; - int i, ret = 0; - - for (i = 0; i < nullb->dev->submit_queues; i++) { - nq = &nullb->queues[i]; - - null_init_queue(nullb, nq); - - ret = setup_commands(nq); - if (ret) - return ret; - nullb->nr_queues++; - } - return 0; -} - static int null_gendisk_register(struct nullb *nullb) { sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; @@ -1965,10 +1745,7 @@ static int null_gendisk_register(struct nullb *nullb) disk->major = null_major; disk->first_minor = nullb->index; disk->minors = 1; - if (queue_is_mq(nullb->q)) - disk->fops = &null_rq_ops; - else - disk->fops = &null_bio_ops; + disk->fops = &null_ops; disk->private_data = nullb; strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); @@ -2036,11 +1813,15 @@ static int null_validate_conf(struct nullb_device *dev) pr_err("legacy IO path is no longer available\n"); return -EINVAL; } + if (dev->queue_mode == NULL_Q_BIO) { + pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n"); + dev->queue_mode = NULL_Q_MQ; + } dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); - if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->use_per_node_hctx) { if (dev->submit_queues != nr_online_nodes) dev->submit_queues = nr_online_nodes; } else if (dev->submit_queues > nr_cpu_ids) @@ -2052,8 +1833,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; dev->prev_poll_queues = dev->poll_queues; - - dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); /* Do memory allocation, so set blocking */ @@ -2064,9 +1843,6 @@ static int null_validate_conf(struct nullb_device *dev) dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, dev->cache_size); dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); - /* can not stop a queue */ - if (dev->queue_mode == NULL_Q_BIO) - dev->mbps = 0; if (dev->zoned && (!dev->zone_size || !is_power_of_2(dev->zone_size))) { @@ -2127,44 +1903,31 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->queue_mode == NULL_Q_MQ) { - if (dev->shared_tags) { - if (!tag_set.ops) { - rv = null_init_tag_set(NULL, &tag_set); - if (rv) { - tag_set.ops = NULL; - goto out_cleanup_queues; - } + if (dev->shared_tags) { + if (!tag_set.ops) { + rv = null_init_tag_set(NULL, &tag_set); + if (rv) { + tag_set.ops = NULL; + goto out_cleanup_queues; } - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); } + nullb->tag_set = &tag_set; + rv = 0; + } else { + nullb->tag_set = &nullb->__tag_set; + rv = null_init_tag_set(nullb, nullb->tag_set); + } - if (rv) - goto out_cleanup_queues; - - nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); - goto out_cleanup_tags; - } - nullb->q = nullb->disk->queue; - } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->disk = blk_alloc_disk(NULL, nullb->dev->home_node); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); - goto out_cleanup_queues; - } + if (rv) + goto out_cleanup_queues; - nullb->q = nullb->disk->queue; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_disk; + nullb->tag_set->timeout = 5 * HZ; + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); + goto out_cleanup_tags; } + nullb->q = nullb->disk->queue; if (dev->mbps) { set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); @@ -2232,10 +1995,10 @@ out_cleanup_zone: out_cleanup_disk: put_disk(nullb->disk); out_cleanup_tags: - if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); out_cleanup_queues: - cleanup_queues(nullb); + kfree(nullb->queues); out_free_nullb: kfree(nullb); dev->nullb = NULL; @@ -2311,7 +2074,7 @@ static int __init null_init(void) return -EINVAL; } - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("submit_queues param is set to %u.\n", nr_online_nodes); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 7bcfc0922ae8..7c618d53d8fd 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -16,11 +16,6 @@ #include struct nullb_cmd { - union { - struct request *rq; - struct bio *bio; - }; - unsigned int tag; blk_status_t error; bool fake_timeout; struct nullb_queue *nq; @@ -28,16 +23,11 @@ struct nullb_cmd { }; struct nullb_queue { - unsigned long *tag_map; - wait_queue_head_t wait; - unsigned int queue_depth; struct nullb_device *dev; unsigned int requeue_selection; struct list_head poll_list; spinlock_t poll_lock; - - struct nullb_cmd *cmds; }; struct nullb_zone { @@ -60,13 +50,6 @@ struct nullb_zone { unsigned int capacity; }; -/* Queue modes */ -enum { - NULL_Q_BIO = 0, - NULL_Q_RQ = 1, - NULL_Q_MQ = 2, -}; - struct nullb_device { struct nullb *nullb; struct config_group group; diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h index 6b2b370e786f..ef2d05d5f0df 100644 --- a/drivers/block/null_blk/trace.h +++ b/drivers/block/null_blk/trace.h @@ -41,10 +41,11 @@ TRACE_EVENT(nullb_zone_op, __field(unsigned int, zone_cond) ), TP_fast_assign( - __entry->op = req_op(cmd->rq); + __entry->op = req_op(blk_mq_rq_from_pdu(cmd)); __entry->zone_no = zone_no; __entry->zone_cond = zone_cond; - __assign_disk_name(__entry->disk, cmd->rq->q->disk); + __assign_disk_name(__entry->disk, + blk_mq_rq_from_pdu(cmd)->q->disk); ), TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s", __print_disk_name(__entry->disk), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 6f5e0994862e..3605afe105da 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -168,10 +168,7 @@ int null_register_zoned_dev(struct nullb *nullb) disk_set_max_open_zones(nullb->disk, dev->zone_max_open); disk_set_max_active_zones(nullb->disk, dev->zone_max_active); - if (queue_is_mq(q)) - return blk_revalidate_disk_zones(nullb->disk, NULL); - - return 0; + return blk_revalidate_disk_zones(nullb->disk, NULL); } void null_free_zoned_dev(struct nullb_device *dev) @@ -394,10 +391,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, */ if (append) { sector = zone->wp; - if (dev->queue_mode == NULL_Q_MQ) - cmd->rq->__sector = sector; - else - cmd->bio->bi_iter.bi_sector = sector; + blk_mq_rq_from_pdu(cmd)->__sector = sector; } else if (sector != zone->wp) { ret = BLK_STS_IOERR; goto unlock; -- cgit v1.2.3 From e32b0855367b65095823b4427aad3da7c6a771a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:45 +0100 Subject: null_blk: initialize the tag_set timeout in null_init_tag_set Otherwise it will be reset to the always same value when initializing a device using the shared tag_set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d6836327eefb..89e63d1c6103 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1797,6 +1797,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->nr_hw_queues = hw_queues; set->queue_depth = queue_depth; set->numa_node = numa_node; + set->timeout = 5 * HZ; if (poll_queues) { set->nr_hw_queues += poll_queues; set->nr_maps = 3; @@ -1921,7 +1922,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); -- cgit v1.2.3 From 72ca28765fc461c1aeb87372359ec0cfd609448b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:46 +0100 Subject: null_blk: refactor tag_set setup Move the tagset initialization out of null_add_dev into a new null_setup_tagset helper, and move the shared vs local differences out of null_init_tag_set into the callers. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 106 ++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 89e63d1c6103..03c3917a56fa 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1759,55 +1759,65 @@ static int null_gendisk_register(struct nullb *nullb) return add_disk(disk); } -static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) +static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { - unsigned int flags = BLK_MQ_F_SHOULD_MERGE; - int hw_queues, numa_node; - unsigned int queue_depth; - int poll_queues; - - if (nullb) { - hw_queues = nullb->dev->submit_queues; - poll_queues = nullb->dev->poll_queues; - queue_depth = nullb->dev->hw_queue_depth; - numa_node = nullb->dev->home_node; - if (nullb->dev->no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (nullb->dev->shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (nullb->dev->blocking) - flags |= BLK_MQ_F_BLOCKING; - } else { - hw_queues = g_submit_queues; - poll_queues = g_poll_queues; - queue_depth = g_hw_queue_depth; - numa_node = g_home_node; - if (g_no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (g_shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (g_blocking) - flags |= BLK_MQ_F_BLOCKING; - } - set->ops = &null_mq_ops; - set->cmd_size = sizeof(struct nullb_cmd); - set->flags = flags; - set->driver_data = nullb; - set->nr_hw_queues = hw_queues; - set->queue_depth = queue_depth; - set->numa_node = numa_node; + set->cmd_size = sizeof(struct nullb_cmd); set->timeout = 5 * HZ; + set->nr_maps = 1; if (poll_queues) { set->nr_hw_queues += poll_queues; - set->nr_maps = 3; - } else { - set->nr_maps = 1; + set->nr_maps += 2; } - return blk_mq_alloc_tag_set(set); } +static int null_init_global_tag_set(void) +{ + int error; + + if (tag_set.ops) + return 0; + + tag_set.nr_hw_queues = g_submit_queues; + tag_set.queue_depth = g_hw_queue_depth; + tag_set.numa_node = g_home_node; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (g_no_sched) + tag_set.flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (g_blocking) + tag_set.flags |= BLK_MQ_F_BLOCKING; + + error = null_init_tag_set(&tag_set, g_poll_queues); + if (error) + tag_set.ops = NULL; + return error; +} + +static int null_setup_tagset(struct nullb *nullb) +{ + if (nullb->dev->shared_tags) { + nullb->tag_set = &tag_set; + return null_init_global_tag_set(); + } + + nullb->tag_set = &nullb->__tag_set; + nullb->tag_set->driver_data = nullb; + nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues; + nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth; + nullb->tag_set->numa_node = nullb->dev->home_node; + nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE; + if (nullb->dev->no_sched) + nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED; + if (nullb->dev->shared_tag_bitmap) + nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (nullb->dev->blocking) + nullb->tag_set->flags |= BLK_MQ_F_BLOCKING; + return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues); +} + static int null_validate_conf(struct nullb_device *dev) { if (dev->queue_mode == NULL_Q_RQ) { @@ -1904,21 +1914,7 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->shared_tags) { - if (!tag_set.ops) { - rv = null_init_tag_set(NULL, &tag_set); - if (rv) { - tag_set.ops = NULL; - goto out_cleanup_queues; - } - } - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); - } - + rv = null_setup_tagset(nullb); if (rv) goto out_cleanup_queues; -- cgit v1.2.3 From 0a39e550c18244cdb9c4e671266a2a1d682d15c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:47 +0100 Subject: null_blk: remove null_gendisk_register null_gendisk_register isn't a very useful abstraction given that it doesn't even allocate the gendisk. Merge it into the only caller instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 03c3917a56fa..0c8d50423213 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1735,30 +1735,6 @@ static int setup_queues(struct nullb *nullb) return 0; } -static int null_gendisk_register(struct nullb *nullb) -{ - sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; - struct gendisk *disk = nullb->disk; - - set_capacity(disk, size); - - disk->major = null_major; - disk->first_minor = nullb->index; - disk->minors = 1; - disk->fops = &null_ops; - disk->private_data = nullb; - strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); - - if (nullb->dev->zoned) { - int ret = null_register_zoned_dev(nullb); - - if (ret) - return ret; - } - - return add_disk(disk); -} - static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { set->ops = &null_mq_ops; @@ -1972,7 +1948,22 @@ static int null_add_dev(struct nullb_device *dev) sprintf(nullb->disk_name, "nullb%d", nullb->index); } - rv = null_gendisk_register(nullb); + set_capacity(nullb->disk, + ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT); + nullb->disk->major = null_major; + nullb->disk->first_minor = nullb->index; + nullb->disk->minors = 1; + nullb->disk->fops = &null_ops; + nullb->disk->private_data = nullb; + strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + + if (nullb->dev->zoned) { + rv = null_register_zoned_dev(nullb); + if (rv) + goto out_ida_free; + } + + rv = add_disk(nullb->disk); if (rv) goto out_ida_free; -- cgit v1.2.3 From e440626b1caf3767eda2d78610dfdc0ae7fd5238 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:48 +0100 Subject: null_blk: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 41 +++++++++++++++++++-------------------- drivers/block/null_blk/null_blk.h | 4 ++-- drivers/block/null_blk/zoned.c | 15 +++++++------- 3 files changed, 29 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 0c8d50423213..a0b726c8366c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1694,7 +1694,7 @@ static void null_del_dev(struct nullb *nullb) dev->nullb = NULL; } -static void null_config_discard(struct nullb *nullb) +static void null_config_discard(struct nullb *nullb, struct queue_limits *lim) { if (nullb->dev->discard == false) return; @@ -1711,7 +1711,7 @@ static void null_config_discard(struct nullb *nullb) return; } - blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + lim->max_hw_discard_sectors = UINT_MAX >> 9; } static const struct block_device_operations null_ops = { @@ -1869,6 +1869,12 @@ static bool null_setup_fault(void) static int null_add_dev(struct nullb_device *dev) { + struct queue_limits lim = { + .logical_block_size = dev->blocksize, + .physical_block_size = dev->blocksize, + .max_hw_sectors = dev->max_sectors, + }; + struct nullb *nullb; int rv; @@ -1894,10 +1900,19 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); + if (dev->virt_boundary) + lim.virt_boundary_mask = PAGE_SIZE - 1; + null_config_discard(nullb, &lim); + if (dev->zoned) { + rv = null_init_zoned_dev(dev, &lim); + if (rv) + goto out_cleanup_tags; + } + + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); - goto out_cleanup_tags; + goto out_cleanup_zone; } nullb->q = nullb->disk->queue; @@ -1911,12 +1926,6 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_write_cache(nullb->q, true, true); } - if (dev->zoned) { - rv = null_init_zoned_dev(dev, nullb->q); - if (rv) - goto out_cleanup_disk; - } - nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); @@ -1924,22 +1933,12 @@ static int null_add_dev(struct nullb_device *dev) rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) { mutex_unlock(&lock); - goto out_cleanup_zone; + goto out_cleanup_disk; } nullb->index = rv; dev->index = rv; mutex_unlock(&lock); - blk_queue_logical_block_size(nullb->q, dev->blocksize); - blk_queue_physical_block_size(nullb->q, dev->blocksize); - if (dev->max_sectors) - blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); - - if (dev->virt_boundary) - blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1); - - null_config_discard(nullb); - if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 7c618d53d8fd..25320fe34bfe 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -131,7 +131,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors); #ifdef CONFIG_BLK_DEV_ZONED -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q); +int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim); int null_register_zoned_dev(struct nullb *nullb); void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, @@ -144,7 +144,7 @@ ssize_t zone_cond_store(struct nullb_device *dev, const char *page, size_t count, enum blk_zone_cond cond); #else static inline int null_init_zoned_dev(struct nullb_device *dev, - struct request_queue *q) + struct queue_limits *lim) { pr_err("CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 3605afe105da..1689e2584104 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -58,7 +58,8 @@ static inline void null_unlock_zone(struct nullb_device *dev, mutex_unlock(&zone->mutex); } -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) +int null_init_zoned_dev(struct nullb_device *dev, + struct queue_limits *lim) { sector_t dev_capacity_sects, zone_capacity_sects; struct nullb_zone *zone; @@ -151,23 +152,21 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) sector += dev->zone_size_sects; } + lim->zoned = true; + lim->chunk_sectors = dev->zone_size_sects; + lim->max_zone_append_sectors = dev->zone_size_sects; + lim->max_open_zones = dev->zone_max_open; + lim->max_active_zones = dev->zone_max_active; return 0; } int null_register_zoned_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - disk_set_zoned(nullb->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); - blk_queue_chunk_sectors(q, dev->zone_size_sects); nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); - disk_set_max_open_zones(nullb->disk, dev->zone_max_open); - disk_set_max_active_zones(nullb->disk, dev->zone_max_active); - return blk_revalidate_disk_zones(nullb->disk, NULL); } -- cgit v1.2.3 From 34da27aa8956d3a75c7556a59c9c7cfd0b3f18ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Feb 2024 09:46:19 +0100 Subject: irqchip/imx-intmux: Handle pure domain searches correctly The removal of the paremeter count restriction in the core code to allow pure domain token based select() decisions broke the IMX intmux select callback as that unconditioally expects that there is a parameter. Add the missing check for zero parameter count and the token match. Fixes: de1ff306dcf4 ("genirq/irqdomain: Remove the param count restriction from select()") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87ttm3ikok.ffs@tglx --- drivers/irqchip/irq-imx-intmux.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c index aa041e4dfee0..65084c7619b0 100644 --- a/drivers/irqchip/irq-imx-intmux.c +++ b/drivers/irqchip/irq-imx-intmux.c @@ -166,6 +166,10 @@ static int imx_intmux_irq_select(struct irq_domain *d, struct irq_fwspec *fwspec if (fwspec->fwnode != d->fwnode) return false; + /* Handle pure domain searches */ + if (!fwspec->param_count) + return d->bus_token == bus_token; + return irqchip_data->chanidx == fwspec->param[1]; } -- cgit v1.2.3 From 6f15e617cc99323339dc241d19956f0d640c4354 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 Feb 2024 21:35:16 -0600 Subject: RAS: Introduce a FRU memory poison manager Memory errors are an expected occurrence on systems with high memory density. Generally, errors within a small number of unique physical locations are acceptable, based on manufacturer and/or admin policy. During run time, memory with errors may be retired so it is no longer used by the system. This is done in mm through page poisoning, and the effect will remain until the system is restarted. If a memory location is consistently faulty, then the same run time error handling may occur in the next reboot cycle, leading to terminating jobs due to that already known bad memory. This could be prevented if information from the previous boot was not lost. Some add-in cards with driver-managed memory have on-board persistent storage. Their driver saves memory error information to the persistent storage during run time. The information is then restored after reset, and known bad memory will be retired before the hardware is used. A running log of bad memory locations is kept across multiple resets. A similar solution is desirable for CPUs. However, this solution should leverage industry-standard components as much as possible, rather than a bespoke platform driver. Two components are needed: a record format and a persistent storage interface. Implement a new module to manage the record formats on persistent storage. Use the requirements for an AMD MI300-based system to start. Vendor- and platform-specific details can be abstracted later as needed. [ bp: Massage commit message and code, squash 30-ish more fixes from Yazen and me. ] Signed-off-by: Yazen Ghannam Co-developed-by: Signed-off-by: Co-developed-by: Signed-off-by: Tested-by: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240214033516.1344948-3-yazen.ghannam@amd.com --- MAINTAINERS | 6 + drivers/ras/Kconfig | 12 + drivers/ras/Makefile | 1 + drivers/ras/amd/fmpm.c | 812 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 831 insertions(+) create mode 100644 drivers/ras/amd/fmpm.c (limited to 'drivers') diff --git a/MAINTAINERS b/MAINTAINERS index fc5996feba70..76163f09e4e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18363,6 +18363,12 @@ F: drivers/ras/ F: include/linux/ras.h F: include/ras/ras_event.h +RAS FRU MEMORY POISON MANAGER (FMPM) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/ras/amd/fmpm.c + RC-CORE / LIRC FRAMEWORK M: Sean Young L: linux-media@vger.kernel.org diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index 2e969f59c0ca..fc4f4bb94a4c 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -34,4 +34,16 @@ if RAS source "arch/x86/ras/Kconfig" source "drivers/ras/amd/atl/Kconfig" +config RAS_FMPM + tristate "FRU Memory Poison Manager" + default m + depends on AMD_ATL && ACPI_APEI + help + Support saving and restoring memory error information across reboot + using ACPI ERST as persistent storage. Error information is saved with + the UEFI CPER "FRU Memory Poison" section format. + + Memory will be retired during boot time and run time depending on + platform-specific policies. + endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 3fac80f58005..11f95d59d397 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -3,4 +3,5 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o +obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o obj-y += amd/atl/ diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c new file mode 100644 index 000000000000..80dd112b720a --- /dev/null +++ b/drivers/ras/amd/fmpm.c @@ -0,0 +1,812 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * FRU (Field-Replaceable Unit) Memory Poison Manager + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Authors: + * Naveen Krishna Chatradhi + * Muralidhara M K + * Yazen Ghannam + * + * Implementation notes, assumptions, and limitations: + * + * - FRU memory poison section and memory poison descriptor definitions are not yet + * included in the UEFI specification. So they are defined here. Afterwards, they + * may be moved to linux/cper.h, if appropriate. + * + * - Platforms based on AMD MI300 systems will be the first to use these structures. + * There are a number of assumptions made here that will need to be generalized + * to support other platforms. + * + * AMD MI300-based platform(s) assumptions: + * - Memory errors are reported through x86 MCA. + * - The entire DRAM row containing a memory error should be retired. + * - There will be (1) FRU memory poison section per CPER. + * - The FRU will be the CPU package (processor socket). + * - The default number of memory poison descriptor entries should be (8). + * - The platform will use ACPI ERST for persistent storage. + * - All FRU records should be saved to persistent storage. Module init will + * fail if any FRU record is not successfully written. + * + * - Boot time memory retirement may occur later than ideal due to dependencies + * on other libraries and drivers. This leaves a gap where bad memory may be + * accessed during early boot stages. + * + * - Enough memory should be pre-allocated for each FRU record to be able to hold + * the expected number of descriptor entries. This, mostly empty, record is + * written to storage during init time. Subsequent writes to the same record + * should allow the Platform to update the stored record in-place. Otherwise, + * if the record is extended, then the Platform may need to perform costly memory + * management operations on the storage. For example, the Platform may spend time + * in Firmware copying and invalidating memory on a relatively slow SPI ROM. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include + +#include +#include + +#define INVALID_CPU UINT_MAX + +/* Validation Bits */ +#define FMP_VALID_ARCH_TYPE BIT_ULL(0) +#define FMP_VALID_ARCH BIT_ULL(1) +#define FMP_VALID_ID_TYPE BIT_ULL(2) +#define FMP_VALID_ID BIT_ULL(3) +#define FMP_VALID_LIST_ENTRIES BIT_ULL(4) +#define FMP_VALID_LIST BIT_ULL(5) + +/* FRU Architecture Types */ +#define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0 + +/* FRU ID Types */ +#define FMP_ID_TYPE_X86_PPIN 0 + +/* FRU Memory Poison Section */ +struct cper_sec_fru_mem_poison { + u32 checksum; + u64 validation_bits; + u32 fru_arch_type; + u64 fru_arch; + u32 fru_id_type; + u64 fru_id; + u32 nr_entries; +} __packed; + +/* FRU Descriptor ID Types */ +#define FPD_HW_ID_TYPE_MCA_IPID 0 + +/* FRU Descriptor Address Types */ +#define FPD_ADDR_TYPE_MCA_ADDR 0 + +/* Memory Poison Descriptor */ +struct cper_fru_poison_desc { + u64 timestamp; + u32 hw_id_type; + u64 hw_id; + u32 addr_type; + u64 addr; +} __packed; + +/* Collection of headers and sections for easy pointer use. */ +struct fru_rec { + struct cper_record_header hdr; + struct cper_section_descriptor sec_desc; + struct cper_sec_fru_mem_poison fmp; + struct cper_fru_poison_desc entries[]; +} __packed; + +/* + * Pointers to the complete CPER record of each FRU. + * + * Memory allocation will include padded space for descriptor entries. + */ +static struct fru_rec **fru_records; + +#define CPER_CREATOR_FMP \ + GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ + 0xa0, 0x33, 0x08, 0x75) + +#define CPER_SECTION_TYPE_FMP \ + GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \ + 0x12, 0x0a, 0x44, 0x58) + +/** + * DOC: fru_poison_entries (byte) + * Maximum number of descriptor entries possible for each FRU. + * + * Values between '1' and '255' are valid. + * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES. + */ +static u8 max_nr_entries; +module_param(max_nr_entries, byte, 0644); +MODULE_PARM_DESC(max_nr_entries, + "Maximum number of memory poison descriptor entries per FRU"); + +#define FMPM_DEFAULT_MAX_NR_ENTRIES 8 + +/* Maximum number of FRUs in the system. */ +#define FMPM_MAX_NR_FRU 256 +static unsigned int max_nr_fru; + +/* Total length of record including headers and list of descriptor entries. */ +static size_t max_rec_len; + +/* + * Protect the local records cache in fru_records and prevent concurrent + * writes to storage. This is only needed after init once notifier block + * registration is done. + */ +static DEFINE_MUTEX(fmpm_update_mutex); + +#define for_each_fru(i, rec) \ + for (i = 0; rec = fru_records[i], i < max_nr_fru; i++) + +static inline u32 get_fmp_len(struct fru_rec *rec) +{ + return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor); +} + +static struct fru_rec *get_fru_record(u64 fru_id) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (rec->fmp.fru_id == fru_id) + return rec; + } + + pr_debug("Record not found for FRU 0x%016llx\n", fru_id); + + return NULL; +} + +/* + * Sum up all bytes within the FRU Memory Poison Section including the Memory + * Poison Descriptor entries. + * + * Don't include the old checksum here. It's a u32 value, so summing each of its + * bytes will give the wrong total. + */ +static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len) +{ + u32 checksum = 0; + u8 *buf, *end; + + /* Skip old checksum. */ + buf = (u8 *)fmp + sizeof(u32); + end = buf + len; + + while (buf < end) + checksum += (u8)(*(buf++)); + + return checksum; +} + +static int update_record_on_storage(struct fru_rec *rec) +{ + u32 len, checksum; + int ret; + + /* Calculate a new checksum. */ + len = get_fmp_len(rec); + + /* Get the current total. */ + checksum = do_fmp_checksum(&rec->fmp, len); + + /* Use the complement value. */ + rec->fmp.checksum = -checksum; + + pr_debug("Writing to storage\n"); + + ret = erst_write(&rec->hdr); + if (ret) { + pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id); + + if (ret == -ENOSPC) + pr_warn("Not enough space on storage\n"); + } + + return ret; +} + +static bool rec_has_valid_entries(struct fru_rec *rec) +{ + if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES)) + return false; + + if (!(rec->fmp.validation_bits & FMP_VALID_LIST)) + return false; + + return true; +} + +static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) +{ + /* + * Ignore timestamp field. + * The same physical error may be reported multiple times due to stuck bits, etc. + * + * Also, order the checks from most->least likely to fail to shortcut the code. + */ + if (old->addr != new->addr) + return false; + + if (old->hw_id != new->hw_id) + return false; + + if (old->addr_type != new->addr_type) + return false; + + if (old->hw_id_type != new->hw_id_type) + return false; + + return true; +} + +static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) +{ + unsigned int i; + + for (i = 0; i < rec->fmp.nr_entries; i++) { + struct cper_fru_poison_desc *fpd_i = &rec->entries[i]; + + if (fpds_equal(fpd_i, fpd)) { + pr_debug("Found duplicate record\n"); + return true; + } + } + + return false; +} + +static void update_fru_record(struct fru_rec *rec, struct mce *m) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + struct cper_fru_poison_desc fpd, *fpd_dest; + u32 entry = 0; + + mutex_lock(&fmpm_update_mutex); + + memset(&fpd, 0, sizeof(struct cper_fru_poison_desc)); + + fpd.timestamp = m->time; + fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID; + fpd.hw_id = m->ipid; + fpd.addr_type = FPD_ADDR_TYPE_MCA_ADDR; + fpd.addr = m->addr; + + /* This is the first entry, so just save it. */ + if (!rec_has_valid_entries(rec)) + goto save_fpd; + + /* Ignore already recorded errors. */ + if (rec_has_fpd(rec, &fpd)) + goto out_unlock; + + if (rec->fmp.nr_entries >= max_nr_entries) { + pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id); + goto out_unlock; + } + + entry = fmp->nr_entries; + +save_fpd: + fpd_dest = &rec->entries[entry]; + memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); + + fmp->nr_entries = entry + 1; + fmp->validation_bits |= FMP_VALID_LIST_ENTRIES; + fmp->validation_bits |= FMP_VALID_LIST; + + pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry); + + update_record_on_storage(rec); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +} + +static void retire_dram_row(u64 addr, u64 id, u32 cpu) +{ + struct atl_err a_err; + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + amd_retire_dram_row(&a_err); +} + +static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *m = (struct mce *)data; + struct fru_rec *rec; + + if (!mce_is_memory_error(m)) + return NOTIFY_DONE; + + retire_dram_row(m->addr, m->ipid, m->extcpu); + + /* + * An invalid FRU ID should not happen on real errors. But it + * could happen from software error injection, etc. + */ + rec = get_fru_record(m->ppin); + if (!rec) + return NOTIFY_DONE; + + update_fru_record(rec, m); + + return NOTIFY_OK; +} + +static struct notifier_block fru_mem_poison_nb = { + .notifier_call = fru_handle_mem_poison, + .priority = MCE_PRIO_LOWEST, +}; + +static void retire_mem_fmp(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + unsigned int i, cpu; + + for (i = 0; i < fmp->nr_entries; i++) { + struct cper_fru_poison_desc *fpd = &rec->entries[i]; + unsigned int err_cpu = INVALID_CPU; + + if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID) + continue; + + if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR) + continue; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_ppin(cpu) == fmp->fru_id) { + err_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (err_cpu == INVALID_CPU) + continue; + + retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); + } +} + +static void retire_mem_records(void) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (!rec_has_valid_entries(rec)) + continue; + + retire_mem_fmp(rec); + } +} + +/* Set the CPER Record Header and CPER Section Descriptor fields. */ +static void set_rec_fields(struct fru_rec *rec) +{ + struct cper_section_descriptor *sec_desc = &rec->sec_desc; + struct cper_record_header *hdr = &rec->hdr; + + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + hdr->revision = CPER_RECORD_REV; + hdr->signature_end = CPER_SIG_END; + + /* + * Currently, it is assumed that there is one FRU Memory Poison + * section per CPER. But this may change for other implementations. + */ + hdr->section_count = 1; + + /* The logged errors are recoverable. Otherwise, they'd never make it here. */ + hdr->error_severity = CPER_SEV_RECOVERABLE; + + hdr->validation_bits = 0; + hdr->record_length = max_rec_len; + hdr->creator_id = CPER_CREATOR_FMP; + hdr->notification_type = CPER_NOTIFY_MCE; + hdr->record_id = cper_next_record_id(); + hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; + + sec_desc->section_offset = sizeof(struct cper_record_header); + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); + sec_desc->revision = CPER_SEC_REV; + sec_desc->validation_bits = 0; + sec_desc->flags = CPER_SEC_PRIMARY; + sec_desc->section_type = CPER_SECTION_TYPE_FMP; + sec_desc->section_severity = CPER_SEV_RECOVERABLE; +} + +static int save_new_records(void) +{ + DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU); + struct fru_rec *rec; + unsigned int i; + int ret = 0; + + for_each_fru(i, rec) { + if (rec->hdr.record_length) + continue; + + set_rec_fields(rec); + + ret = update_record_on_storage(rec); + if (ret) + goto out_clear; + + set_bit(i, new_records); + } + + return ret; + +out_clear: + for_each_fru(i, rec) { + if (!test_bit(i, new_records)) + continue; + + erst_clear(rec->hdr.record_id); + } + + return ret; +} + +/* Check that the record matches expected types for the current system.*/ +static bool fmp_is_usable(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u64 cpuid; + + pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits); + + if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) { + pr_debug("Arch type unknown\n"); + return false; + } + + if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) { + pr_debug("Arch type not 'x86 Family/Model/Stepping'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ARCH)) { + pr_debug("Arch value unknown\n"); + return false; + } + + cpuid = cpuid_eax(1); + if (fmp->fru_arch != cpuid) { + pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n", + fmp->fru_arch, cpuid); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) { + pr_debug("FRU ID type unknown\n"); + return false; + } + + if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) { + pr_debug("FRU ID type is not 'x86 PPIN'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID)) { + pr_debug("FRU ID value unknown\n"); + return false; + } + + return true; +} + +static bool fmp_is_valid(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u32 checksum, len; + + len = get_fmp_len(rec); + if (len < sizeof(struct cper_sec_fru_mem_poison)) { + pr_debug("fmp length is too small\n"); + return false; + } + + /* Checksum must sum to zero for the entire section. */ + checksum = do_fmp_checksum(fmp, len) + fmp->checksum; + if (checksum) { + pr_debug("fmp checksum failed: sum = 0x%x\n", checksum); + print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false); + return false; + } + + if (!fmp_is_usable(rec)) + return false; + + return true; +} + +static struct fru_rec *get_valid_record(struct fru_rec *old) +{ + struct fru_rec *new; + + if (!fmp_is_valid(old)) { + pr_debug("Ignoring invalid record\n"); + return NULL; + } + + new = get_fru_record(old->fmp.fru_id); + if (!new) + pr_debug("Ignoring record for absent FRU\n"); + + return new; +} + +/* + * Fetch saved records from persistent storage. + * + * For each found record: + * - If it was not created by this module, then ignore it. + * - If it is valid, then copy its data to the local cache. + * - If it is not valid, then erase it. + */ +static int get_saved_records(void) +{ + struct fru_rec *old, *new; + u64 record_id; + int ret, pos; + ssize_t len; + + /* + * Assume saved records match current max size. + * + * However, this may not be true depending on module parameters. + */ + old = kmalloc(max_rec_len, GFP_KERNEL); + if (!old) { + ret = -ENOMEM; + goto out; + } + + ret = erst_get_record_id_begin(&pos); + if (ret < 0) + goto out_end; + + while (!erst_get_record_id_next(&pos, &record_id)) { + if (record_id == APEI_ERST_INVALID_RECORD_ID) + goto out_end; + /* + * Make sure to clear temporary buffer between reads to avoid + * leftover data from records of various sizes. + */ + memset(old, 0, max_rec_len); + + len = erst_read_record(record_id, &old->hdr, max_rec_len, + sizeof(struct fru_rec), &CPER_CREATOR_FMP); + if (len < 0) + continue; + + if (len > max_rec_len) { + pr_debug("Found record larger than max_rec_len\n"); + continue; + } + + new = get_valid_record(old); + if (!new) + erst_clear(record_id); + + /* Restore the record */ + memcpy(new, old, len); + } + +out_end: + erst_get_record_id_end(); + kfree(old); +out: + return ret; +} + +static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + + fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX; + fmp->validation_bits |= FMP_VALID_ARCH_TYPE; + + /* Assume all CPUs in the system have the same value for now. */ + fmp->fru_arch = cpuid_eax(1); + fmp->validation_bits |= FMP_VALID_ARCH; + + fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN; + fmp->validation_bits |= FMP_VALID_ID_TYPE; + + fmp->fru_id = topology_ppin(cpu); + fmp->validation_bits |= FMP_VALID_ID; +} + +static int init_fmps(void) +{ + struct fru_rec *rec; + unsigned int i, cpu; + int ret = 0; + + for_each_fru(i, rec) { + unsigned int fru_cpu = INVALID_CPU; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_physical_package_id(cpu) == i) { + fru_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (fru_cpu == INVALID_CPU) { + pr_debug("Failed to find matching CPU for FRU #%u\n", i); + ret = -ENODEV; + break; + } + + set_fmp_fields(rec, fru_cpu); + } + + return ret; +} + +static int get_system_info(void) +{ + /* Only load on MI300A systems for now. */ + if (!(boot_cpu_data.x86_model >= 0x90 && + boot_cpu_data.x86_model <= 0x9f)) + return -ENODEV; + + if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) { + pr_debug("PPIN feature not available\n"); + return -ENODEV; + } + + /* Use CPU socket as FRU for MI300 systems. */ + max_nr_fru = topology_max_packages(); + if (!max_nr_fru) + return -ENODEV; + + if (max_nr_fru > FMPM_MAX_NR_FRU) { + pr_warn("Too many FRUs to manage: found: %u, max: %u\n", + max_nr_fru, FMPM_MAX_NR_FRU); + return -ENODEV; + } + + if (!max_nr_entries) + max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; + + max_rec_len = sizeof(struct fru_rec); + max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; + + pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n", + max_nr_fru, max_nr_entries, max_rec_len); + + return 0; +} + +static void free_records(void) +{ + struct fru_rec *rec; + int i; + + for_each_fru(i, rec) + kfree(rec); + + kfree(fru_records); +} + +static int allocate_records(void) +{ + int i, ret = 0; + + fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL); + if (!fru_records) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < max_nr_fru; i++) { + fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL); + if (!fru_records[i]) { + ret = -ENOMEM; + goto out_free; + } + } + + return ret; + +out_free: + for (; i >= 0; i--) + kfree(fru_records[i]); + + kfree(fru_records); +out: + return ret; +} + +static const struct x86_cpu_id fmpm_cpuids[] = { + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids); + +static int __init fru_mem_poison_init(void) +{ + int ret; + + if (!x86_match_cpu(fmpm_cpuids)) { + ret = -ENODEV; + goto out; + } + + if (erst_disable) { + pr_debug("ERST not available\n"); + ret = -ENODEV; + goto out; + } + + ret = get_system_info(); + if (ret) + goto out; + + ret = allocate_records(); + if (ret) + goto out; + + ret = init_fmps(); + if (ret) + goto out_free; + + ret = get_saved_records(); + if (ret) + goto out_free; + + ret = save_new_records(); + if (ret) + goto out_free; + + retire_mem_records(); + + mce_register_decode_chain(&fru_mem_poison_nb); + + pr_info("FRU Memory Poison Manager initialized\n"); + return 0; + +out_free: + free_records(); +out: + return ret; +} + +static void __exit fru_mem_poison_exit(void) +{ + mce_unregister_decode_chain(&fru_mem_poison_nb); + free_records(); +} + +module_init(fru_mem_poison_init); +module_exit(fru_mem_poison_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FRU Memory Poison Manager"); -- cgit v1.2.3 From e8335ef57c6816d81b24173ba88cc9b3f043687f Mon Sep 17 00:00:00 2001 From: Yochai Hagvi Date: Thu, 25 Jan 2024 15:40:55 +0200 Subject: ice: fix connection state of DPLL and out pin Fix the connection state between source DPLL and output pin, updating the attribute 'state' of 'parent_device'. Previously, the connection state was broken, and didn't reflect the correct state. When 'state_on_dpll_set' is called with the value 'DPLL_PIN_STATE_CONNECTED' (1), the output pin will switch to the given DPLL, and the state of the given DPLL will be set to connected. E.g.: --do pin-set --json '{"id":2, "parent-device":{"parent-id":1, "state": 1 }}' This command will connect DPLL device with id 1 to output pin with id 2. When 'state_on_dpll_set' is called with the value 'DPLL_PIN_STATE_DISCONNECTED' (2) and the given DPLL is currently connected, then the output pin will be disabled. E.g: --do pin-set --json '{"id":2, "parent-device":{"parent-id":1, "state": 2 }}' This command will disable output pin with id 2 if DPLL device with ID 1 is connected to it; otherwise, the command is ignored. Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Reviewed-by: Wojciech Drewek Reviewed-by: Arkadiusz Kubalewski Signed-off-by: Yochai Hagvi Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 43 +++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index b9c5eced6326..9c0d739be1e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -254,6 +254,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, * ice_dpll_pin_enable - enable a pin on dplls * @hw: board private hw structure * @pin: pointer to a pin + * @dpll_idx: dpll index to connect to output pin * @pin_type: type of pin being enabled * @extack: error reporting * @@ -266,7 +267,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, */ static int ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, - enum ice_dpll_pin_type pin_type, + u8 dpll_idx, enum ice_dpll_pin_type pin_type, struct netlink_ext_ack *extack) { u8 flags = 0; @@ -280,10 +281,12 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0); break; case ICE_DPLL_PIN_TYPE_OUTPUT: + flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL; if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN; - ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0); + ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, dpll_idx, + 0, 0); break; default: return -EINVAL; @@ -398,14 +401,27 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, break; case ICE_DPLL_PIN_TYPE_OUTPUT: ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx, - &pin->flags[0], NULL, + &pin->flags[0], &parent, &pin->freq, NULL); if (ret) goto err; - if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) - pin->state[0] = DPLL_PIN_STATE_CONNECTED; - else - pin->state[0] = DPLL_PIN_STATE_DISCONNECTED; + + parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL; + if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) { + pin->state[pf->dplls.eec.dpll_idx] = + parent == pf->dplls.eec.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + parent == pf->dplls.pps.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + } else { + pin->state[pf->dplls.eec.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + } break; case ICE_DPLL_PIN_TYPE_RCLK_INPUT: for (parent = 0; parent < pf->dplls.rclk.num_parents; @@ -570,7 +586,8 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv, mutex_lock(&pf->dplls.lock); if (enable) - ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack); + ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type, + extack); else ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack); if (!ret) @@ -603,6 +620,11 @@ ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv, struct netlink_ext_ack *extack) { bool enable = state == DPLL_PIN_STATE_CONNECTED; + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + + if (!enable && p->state[d->dpll_idx] == DPLL_PIN_STATE_DISCONNECTED) + return 0; return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable, extack, ICE_DPLL_PIN_TYPE_OUTPUT); @@ -669,10 +691,9 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv, ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); if (ret) goto unlock; - if (pin_type == ICE_DPLL_PIN_TYPE_INPUT) + if (pin_type == ICE_DPLL_PIN_TYPE_INPUT || + pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) *state = p->state[d->dpll_idx]; - else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) - *state = p->state[0]; ret = 0; unlock: mutex_unlock(&pf->dplls.lock); -- cgit v1.2.3 From 3b14430c65b4f510b2a310ca4f18ed6ca7184b00 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 8 Feb 2024 23:56:31 +0100 Subject: ice: fix dpll input pin phase_adjust value updates The value of phase_adjust for input pin shall be updated in ice_dpll_pin_state_update(..). Fix by adding proper argument to the firmware query function call - a pin's struct field pointer where the phase_adjust value during driver runtime is stored. Previously the phase_adjust used to misinform user about actual phase_adjust value. I.e., if phase_adjust was set to a non zero value and if driver was reloaded, the user would see the value equal 0, which is not correct - the actual value is equal to value set before driver reload. Fixes: 90e1c90750d7 ("ice: dpll: implement phase related callbacks") Reviewed-by: Alan Brady Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 9c0d739be1e9..2beaeb9c336d 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -373,7 +373,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, case ICE_DPLL_PIN_TYPE_INPUT: ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL, NULL, &pin->flags[0], - &pin->freq, NULL); + &pin->freq, &pin->phase_adjust); if (ret) goto err; if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) { -- cgit v1.2.3 From fc7fd1a10a9d2d38378b42e9a508da4c68018453 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 9 Feb 2024 22:24:30 +0100 Subject: ice: fix dpll and dpll_pin data access on PF reset Do not allow to acquire data or alter configuration of dpll and pins through firmware if PF reset is in progress, this would cause confusing netlink extack errors as the firmware cannot respond or process the request properly during the reset time. Return (-EBUSY) and extack error for the user who tries access/modify the config of dpll/pin through firmware during the reset time. The PF reset and kernel access to dpll data are both asynchronous. It is not possible to guard all the possible reset paths with any determinictic approach. I.e., it is possible that reset starts after reset check is performed (or if the reset would be checked after mutex is locked), but at the same time it is not possible to wait for dpll mutex unlock in the reset flow. This is best effort solution to at least give a clue to the user what is happening in most of the cases, knowing that there are possible race conditions where the user could see a different error received from firmware due to reset unexpectedly starting. Test by looping execution of below steps until netlink error appears: - perform PF reset $ echo 1 > /sys/class/net//device/reset - i.e. try to alter/read dpll/pin config: $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml \ --dump pin-get Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 2beaeb9c336d..343b2a668959 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -30,6 +30,26 @@ static const char * const pin_type_name[] = { [ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input", }; +/** + * ice_dpll_is_reset - check if reset is in progress + * @pf: private board structure + * @extack: error reporting + * + * If reset is in progress, fill extack with error. + * + * Return: + * * false - no reset in progress + * * true - reset in progress + */ +static bool ice_dpll_is_reset(struct ice_pf *pf, struct netlink_ext_ack *extack) +{ + if (ice_is_reset_in_progress(pf->state)) { + NL_SET_ERR_MSG(extack, "PF reset in progress"); + return true; + } + return false; +} + /** * ice_dpll_pin_freq_set - set pin's frequency * @pf: private board structure @@ -109,6 +129,9 @@ ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack); mutex_unlock(&pf->dplls.lock); @@ -584,6 +607,9 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); if (enable) ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type, @@ -687,6 +713,9 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); if (ret) @@ -811,6 +840,9 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); mutex_unlock(&pf->dplls.lock); @@ -1090,6 +1122,9 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) @@ -1144,6 +1179,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) -- cgit v1.2.3 From 9a8385fe14bcb250a3889e744dc54e9c411d8400 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 9 Feb 2024 22:24:31 +0100 Subject: ice: fix dpll periodic work data updates on PF reset Do not allow dpll periodic work function to acquire data from firmware if PF reset is in progress. Acquiring data will cause dmesg errors as the firmware cannot respond or process the request properly during the reset time. Test by looping execution of below step until dmesg error appears: - perform PF reset $ echo 1 > /sys/class/net//device/reset Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Reviewed-by: Igor Bagnucki Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 343b2a668959..395e10c246f7 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1364,8 +1364,10 @@ static void ice_dpll_periodic_work(struct kthread_work *work) struct ice_pf *pf = container_of(d, struct ice_pf, dplls); struct ice_dpll *de = &pf->dplls.eec; struct ice_dpll *dp = &pf->dplls.pps; - int ret; + int ret = 0; + if (ice_is_reset_in_progress(pf->state)) + goto resched; mutex_lock(&pf->dplls.lock); ret = ice_dpll_update_state(pf, de, false); if (!ret) @@ -1385,6 +1387,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work) ice_dpll_notify_changes(de); ice_dpll_notify_changes(dp); +resched: /* Run twice a second or reschedule if update failed */ kthread_queue_delayed_work(d->kworker, &d->work, ret ? msecs_to_jiffies(10) : -- cgit v1.2.3 From ee89921da471edcb4b1e67f5bbfedddf39749782 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 9 Feb 2024 22:24:32 +0100 Subject: ice: fix pin phase adjust updates on PF reset Do not allow to set phase adjust value for a pin if PF reset is in progress, this would cause confusing netlink extack errors as the firmware cannot process the request properly during the reset time. Return (-EBUSY) and report extack error for the user who tries configure pin phase adjust during the reset time. Test by looping execution of below steps until netlink error appears: - perform PF reset $ echo 1 > /sys/class/net//device/reset - change pin phase adjust value: $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml \ --do pin-set --json '{"id":0, "phase-adjust":1000}' Fixes: 90e1c90750d7 ("ice: dpll: implement phase related callbacks") Reviewed-by: Igor Bagnucki Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 395e10c246f7..adfa1f2a80a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -963,6 +963,9 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, u8 flag, flags_en = 0; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); switch (type) { case ICE_DPLL_PIN_TYPE_INPUT: -- cgit v1.2.3 From 080b0c8d6d261b400f24bb1075fbab8c6daaf69e Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Tue, 13 Feb 2024 11:48:50 -0800 Subject: ice: Fix ASSERT_RTNL() warning during certain scenarios Commit 91fdbce7e8d6 ("ice: Add support in the driver for associating queue with napi") invoked the netif_queue_set_napi() call. This kernel function requires to be called with rtnl_lock taken, otherwise ASSERT_RTNL() warning will be triggered. ice_vsi_rebuild() initiating this call is under rtnl_lock when the rebuild is in response to configuration changes from external interfaces (such as tc, ethtool etc. which holds the lock). But, the VSI rebuild generated from service tasks and resets (PFR/CORER/GLOBR) is not under rtnl lock protection. Handle these cases as well to hold lock before the kernel call (by setting the 'locked' boolean to false). netif_queue_set_napi() is also used to clear previously set napi in the q_vector unroll flow. Handle this for locked/lockless execution paths. Fixes: 91fdbce7e8d6 ("ice: Add support in the driver for associating queue with napi") Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 10 ++-- drivers/net/ethernet/intel/ice/ice_lib.c | 86 +++++++++++++++++++++++++------ drivers/net/ethernet/intel/ice/ice_lib.h | 10 +++- drivers/net/ethernet/intel/ice/ice_main.c | 3 +- 4 files changed, 83 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 7ac847718882..c979192e44d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -190,15 +190,13 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) q_vector = vsi->q_vectors[v_idx]; ice_for_each_tx_ring(tx_ring, q_vector->tx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, NULL); + ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX, + NULL); tx_ring->q_vector = NULL; } ice_for_each_rx_ring(rx_ring, q_vector->rx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, NULL); + ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX, + NULL); rx_ring->q_vector = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 9be724291ef8..097bf8fd6bf0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2426,7 +2426,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) ice_vsi_map_rings_to_vectors(vsi); /* Associate q_vector rings to napi */ - ice_vsi_set_napi_queues(vsi, true); + ice_vsi_set_napi_queues(vsi); vsi->stat_offsets_loaded = false; @@ -2904,19 +2904,19 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) } /** - * ice_queue_set_napi - Set the napi instance for the queue + * __ice_queue_set_napi - Set the napi instance for the queue * @dev: device to which NAPI and queue belong * @queue_index: Index of queue * @type: queue type as RX or TX * @napi: NAPI context * @locked: is the rtnl_lock already held * - * Set the napi instance for the queue + * Set the napi instance for the queue. Caller indicates the lock status. */ static void -ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi, - bool locked) +__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi, + bool locked) { if (!locked) rtnl_lock(); @@ -2926,26 +2926,79 @@ ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, } /** - * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * ice_queue_set_napi - Set the napi instance for the queue + * @vsi: VSI being configured + * @queue_index: Index of queue + * @type: queue type as RX or TX + * @napi: NAPI context + * + * Set the napi instance for the queue. The rtnl lock state is derived from the + * execution path. + */ +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi) +{ + struct ice_pf *pf = vsi->back; + + if (!vsi->netdev) + return; + + if (current_work() == &pf->serv_task || + test_bit(ICE_PREPARED_FOR_RESET, pf->state) || + test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_SUSPENDED, pf->state)) + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + false); + else + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + true); +} + +/** + * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi * @q_vector: q_vector pointer * @locked: is the rtnl_lock already held * + * Associate the q_vector napi with all the queue[s] on the vector. + * Caller indicates the lock status. + */ +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +{ + struct ice_rx_ring *rx_ring; + struct ice_tx_ring *tx_ring; + + ice_for_each_rx_ring(rx_ring, q_vector->rx) + __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi, + locked); + + ice_for_each_tx_ring(tx_ring, q_vector->tx) + __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi, + locked); + /* Also set the interrupt number for the NAPI */ + netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); +} + +/** + * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * @q_vector: q_vector pointer + * * Associate the q_vector napi with all the queue[s] on the vector */ -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector) { struct ice_rx_ring *rx_ring; struct ice_tx_ring *tx_ring; ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi); ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi); /* Also set the interrupt number for the NAPI */ netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); } @@ -2953,11 +3006,10 @@ void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) /** * ice_vsi_set_napi_queues * @vsi: VSI pointer - * @locked: is the rtnl_lock already held * * Associate queue[s] with napi for all vectors */ -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) +void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { int i; @@ -2965,7 +3017,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) return; ice_for_each_q_vector(vsi, i) - ice_q_vector_set_napi_queues(vsi->q_vectors[i], locked); + ice_q_vector_set_napi_queues(vsi->q_vectors[i]); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 71bd27244941..bfcfc582a4c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -91,9 +91,15 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi); + +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); + +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector); -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked); +void ice_vsi_set_napi_queues(struct ice_vsi *vsi); int ice_vsi_release(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index dd4a9bc0dfdc..59c7e37f175f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3495,7 +3495,7 @@ static void ice_napi_add(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, ice_napi_poll); - ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); + __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); } } @@ -5447,6 +5447,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + ice_vsi_set_napi_queues(pf->vsi[v]); } ret = ice_req_irq_msix_misc(pf); -- cgit v1.2.3 From 78f65fbf421a61894c14a1b91fe2fb4437b3fe5f Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Feb 2024 19:51:47 +0200 Subject: wifi: iwlwifi: mvm: ensure offloading TID queue exists The resume code path assumes that the TX queue for the offloading TID has been configured. At resume time it then tries to sync the write pointer as it may have been updated by the firmware. In the unusual event that no packets have been send on TID 0, the queue will not have been allocated and this causes a crash. Fix this by ensuring the queue exist at suspend time. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240218194912.6632e6dc7b35.Ie6e6a7488c9c7d4529f13d48f752b5439d8ac3c4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 9 ++++++++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 28 ++++++++++++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 3 ++- 3 files changed, 38 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 4582afb149d7..05b64176859e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1279,7 +1279,9 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, mvm->net_detect = true; } else { - struct iwl_wowlan_config_cmd wowlan_config_cmd = {}; + struct iwl_wowlan_config_cmd wowlan_config_cmd = { + .offloading_tid = 0, + }; wowlan_config_cmd.sta_id = mvmvif->deflink.ap_sta_id; @@ -1291,6 +1293,11 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, goto out_noreset; } + ret = iwl_mvm_sta_ensure_queue( + mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]); + if (ret) + goto out_noreset; + ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd, vif, mvmvif, ap_sta); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 2a3ca9785974..c2e0cff740e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1502,6 +1502,34 @@ out_err: return ret; } +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, + struct ieee80211_txq *txq) +{ + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + int ret = -EINVAL; + + lockdep_assert_held(&mvm->mutex); + + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { + return 0; + } + + if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) { + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + ret = 0; + } + + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + if (!list_empty(&mvmtxq->list)) + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + local_bh_enable(); + + return ret; +} + void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) { struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index b33a0ce096d4..3cf8a70274ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2023 Intel Corporation + * Copyright (C) 2012-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2016 Intel Deutschland GmbH */ @@ -571,6 +571,7 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, bool disable); void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher, -- cgit v1.2.3 From d3433d1bb7bde449035f54b7000361ce151bad07 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 18 Feb 2024 19:51:50 +0200 Subject: wifi: iwlwifi: mvm: fix the TXF mapping for BZ devices Those devices' fifos are numbered differently. Because of that, we were looking at the size of the VO fifo size to determine the size of the A-MSDU which led to a lower throughput. Note that for those devices the only user of the AC -> fifo mapping is the size limitation of A-MSDU. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://msgid.link/20240218194912.da336ca2fa0a.I73e44d5fc474ebb6f275b9008950e59c012f33b2@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/api/txq.h | 12 +++++++++++- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 11 +++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 8 ++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h index 9c69d3674384..e6c0f928a6bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2019-2021, 2023 Intel Corporation + * Copyright (C) 2005-2014, 2019-2021, 2023-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -66,6 +66,16 @@ enum iwl_gen2_tx_fifo { IWL_GEN2_TRIG_TX_FIFO_VO, }; +enum iwl_bz_tx_fifo { + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_VO, +}; /** * enum iwl_tx_queue_cfg_actions - TXQ config options * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index c4f96125cf33..25a5a31e63c2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -31,6 +31,17 @@ const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = { IWL_GEN2_TRIG_TX_FIFO_BK, }; +const u8 iwl_mvm_ac_to_bz_tx_fifo[] = { + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_BK, +}; + struct iwl_mvm_mac_iface_iterator_data { struct iwl_mvm *mvm; struct ieee80211_vif *vif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 40627961b834..81dbef6947f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1581,12 +1581,16 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm, extern const u8 iwl_mvm_ac_to_tx_fifo[]; extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[]; +extern const u8 iwl_mvm_ac_to_bz_tx_fifo[]; static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm, enum ieee80211_ac_numbers ac) { - return iwl_mvm_has_new_tx_api(mvm) ? - iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac]; + if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) + return iwl_mvm_ac_to_bz_tx_fifo[ac]; + if (iwl_mvm_has_new_tx_api(mvm)) + return iwl_mvm_ac_to_gen2_tx_fifo[ac]; + return iwl_mvm_ac_to_tx_fifo[ac]; } struct iwl_rate_info { -- cgit v1.2.3 From eb0e1ebb1772302213f4882f5fada2b3f6362e66 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 20 Feb 2024 22:41:04 -0800 Subject: irqchip/vic: Fix a kernel-doc warning Drop one extraneous struct member to quieten a warning: drivers/irqchip/irq-vic.c:73: warning: Excess struct member 'parent_irq' description in 'vic_device' Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240221064104.7863-1-rdunlap@infradead.org --- drivers/irqchip/irq-vic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index 9e3d5561e04e..ea93e7236c4a 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -47,9 +47,8 @@ /** * struct vic_device - VIC PM device - * @parent_irq: The parent IRQ number of the VIC if cascaded, or 0. - * @irq: The IRQ number for the base of the VIC. * @base: The register base for the VIC. + * @irq: The IRQ number for the base of the VIC. * @valid_sources: A bitmask of valid interrupts * @resume_sources: A bitmask of interrupts for resume. * @resume_irqs: The IRQs enabled for resume. -- cgit v1.2.3 From e0359f1551b8d4a8d00704699c07fabb11a07cf1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Feb 2024 10:35:54 +0100 Subject: Revert "ACPI: EC: Use a spin lock without disabing interrupts" Commit eb9299beadbd ("ACPI: EC: Use a spin lock without disabing interrupts") introduced an unexpected user-visible change in behavior, which is a significant CPU load increase when the EC is in use. This most likely happens due to increased spinlock contention and so reducing this effect would require a major rework of the EC driver locking. There is no time for this in the current cycle, so revert commit eb9299beadbd. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218511 Reported-by: Dieter Mummenschanz Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 112 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index dbdee2924594..02255795b800 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -525,10 +525,12 @@ static void acpi_ec_clear(struct acpi_ec *ec) static void acpi_ec_enable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) __acpi_ec_enable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* Drain additional events if hardware requires that */ if (EC_FLAGS_CLEAR_ON_RESUME) @@ -544,9 +546,11 @@ static void __acpi_ec_flush_work(void) static void acpi_ec_disable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); __acpi_ec_disable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* * When ec_freeze_events is true, we need to flush events in @@ -567,9 +571,10 @@ void acpi_ec_flush_work(void) static bool acpi_ec_guard_event(struct acpi_ec *ec) { + unsigned long flags; bool guarded; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* * If firmware SCI_EVT clearing timing is "event", we actually * don't know when the SCI_EVT will be cleared by firmware after @@ -585,29 +590,31 @@ static bool acpi_ec_guard_event(struct acpi_ec *ec) guarded = ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec->event_state != EC_EVENT_READY && (!ec->curr || ec->curr->command != ACPI_EC_COMMAND_QUERY); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return guarded; } static int ec_transaction_polled(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_POLL)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } static int ec_transaction_completed(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } @@ -749,6 +756,7 @@ static int ec_guard(struct acpi_ec *ec) static int ec_poll(struct acpi_ec *ec) { + unsigned long flags; int repeat = 5; /* number of command restarts */ while (repeat--) { @@ -757,14 +765,14 @@ static int ec_poll(struct acpi_ec *ec) do { if (!ec_guard(ec)) return 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); advance_transaction(ec, false); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } while (time_before(jiffies, delay)); pr_debug("controller reset, restart transaction\n"); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } return -ETIME; } @@ -772,10 +780,11 @@ static int ec_poll(struct acpi_ec *ec) static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, struct transaction *t) { + unsigned long tmp; int ret = 0; /* start transaction */ - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); /* Enable GPE for command processing (IBF=0/OBF=1) */ if (!acpi_ec_submit_flushable_request(ec)) { ret = -EINVAL; @@ -786,11 +795,11 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, ec->curr = t; ec_dbg_req("Command(%s) started", acpi_ec_cmd_string(t->command)); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); ret = ec_poll(ec); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); if (t->irq_count == ec_storm_threshold) acpi_ec_unmask_events(ec); ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command)); @@ -799,7 +808,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, acpi_ec_complete_request(ec); ec_dbg_ref(ec, "Decrease command"); unlock: - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); return ret; } @@ -927,7 +936,9 @@ EXPORT_SYMBOL(ec_get_handle); static void acpi_ec_start(struct acpi_ec *ec, bool resuming) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (!test_and_set_bit(EC_FLAGS_STARTED, &ec->flags)) { ec_dbg_drv("Starting EC"); /* Enable GPE for event processing (SCI_EVT=1) */ @@ -937,28 +948,31 @@ static void acpi_ec_start(struct acpi_ec *ec, bool resuming) } ec_log_drv("EC started"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static bool acpi_ec_stopped(struct acpi_ec *ec) { + unsigned long flags; bool flushed; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); flushed = acpi_ec_flushed(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return flushed; } static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) { ec_dbg_drv("Stopping EC"); set_bit(EC_FLAGS_STOPPED, &ec->flags); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); wait_event(ec->wait, acpi_ec_stopped(ec)); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* Disable GPE for event processing (SCI_EVT=1) */ if (!suspending) { acpi_ec_complete_request(ec); @@ -969,25 +983,29 @@ static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) clear_bit(EC_FLAGS_STOPPED, &ec->flags); ec_log_drv("EC stopped"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_enter_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = true; ec->polling_guard = 0; ec_log_drv("interrupt blocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_leave_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = ec_busy_polling; ec->polling_guard = ec_polling_guard; ec_log_drv("interrupt unblocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } void acpi_ec_block_transactions(void) @@ -1119,9 +1137,9 @@ static void acpi_ec_event_processor(struct work_struct *work) ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_put_query_handler(handler); kfree(q); @@ -1184,12 +1202,12 @@ static int acpi_ec_submit_query(struct acpi_ec *ec) */ ec_dbg_evt("Query(0x%02x) scheduled", value); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress++; queue_work(ec_query_wq, &q->work); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); return 0; @@ -1205,14 +1223,14 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event started"); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); while (ec->events_to_process) { - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_submit_query(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->events_to_process--; } @@ -1229,11 +1247,11 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event stopped"); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); guard_timeout = !!ec_guard(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); /* Take care of SCI_EVT unless someone else is doing that. */ if (guard_timeout && !ec->curr) @@ -1246,7 +1264,7 @@ static void acpi_ec_event_handler(struct work_struct *work) ec->events_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); } static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt) @@ -1271,11 +1289,13 @@ static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt static void acpi_ec_handle_interrupt(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); clear_gpe_and_advance_transaction(ec, true); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static u32 acpi_ec_gpe_handler(acpi_handle gpe_device, @@ -2085,7 +2105,7 @@ bool acpi_ec_dispatch_gpe(void) * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. */ - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); if (acpi_ec_gpe_status_set(first_ec)) { pm_pr_dbg("ACPI EC GPE status set\n"); @@ -2094,7 +2114,7 @@ bool acpi_ec_dispatch_gpe(void) work_in_progress = acpi_ec_work_in_progress(first_ec); } - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); if (!work_in_progress) return false; @@ -2107,11 +2127,11 @@ bool acpi_ec_dispatch_gpe(void) pm_pr_dbg("ACPI EC work flushed\n"); - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); work_in_progress = acpi_ec_work_in_progress(first_ec); - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); } while (work_in_progress && !pm_wakeup_pending()); return false; -- cgit v1.2.3 From 32ce3bb57b6b402de2aec1012511e7ac4e7449dc Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Thu, 22 Feb 2024 11:12:29 +0100 Subject: spi: cadence-qspi: fix pointer reference in runtime PM hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_get_drvdata() gets used to acquire the pointer to cqspi and the SPI controller. Neither embed the other; this lead to memory corruption. On a given platform (Mobileye EyeQ5) the memory corruption is hidden inside cqspi->f_pdata. Also, this uninitialised memory is used as a mutex (ctlr->bus_lock_mutex) by spi_controller_suspend(). Fixes: 2087e85bb66e ("spi: cadence-quadspi: fix suspend-resume implementations") Reviewed-by: Dhruva Gole Signed-off-by: Théo Lebrun Link: https://msgid.link/r/20240222-cdns-qspi-pm-fix-v4-1-6b6af8bcbf59@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index f94e0d370d46..0d184d65dce7 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1930,10 +1930,9 @@ static void cqspi_remove(struct platform_device *pdev) static int cqspi_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); int ret; - ret = spi_controller_suspend(host); + ret = spi_controller_suspend(cqspi->host); cqspi_controller_enable(cqspi, 0); clk_disable_unprepare(cqspi->clk); @@ -1944,7 +1943,6 @@ static int cqspi_suspend(struct device *dev) static int cqspi_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); clk_prepare_enable(cqspi->clk); cqspi_wait_idle(cqspi); @@ -1953,7 +1951,7 @@ static int cqspi_resume(struct device *dev) cqspi->current_cs = -1; cqspi->sclk = 0; - return spi_controller_resume(host); + return spi_controller_resume(cqspi->host); } static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, -- cgit v1.2.3 From 959043afe53ae80633e810416cee6076da6e91c6 Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Thu, 22 Feb 2024 11:12:30 +0100 Subject: spi: cadence-qspi: remove system-wide suspend helper calls from runtime PM hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ->runtime_suspend() and ->runtime_resume() callbacks are not expected to call spi_controller_suspend() and spi_controller_resume(). Remove calls to those in the cadence-qspi driver. Those helpers have two roles currently: - They stop/start the queue, including dealing with the kworker. - They toggle the SPI controller SPI_CONTROLLER_SUSPENDED flag. It requires acquiring ctlr->bus_lock_mutex. Step one is irrelevant because cadence-qspi is not queued. Step two however has two implications: - A deadlock occurs, because ->runtime_resume() is called in a context where the lock is already taken (in the ->exec_op() callback, where the usage count is incremented). - It would disallow all operations once the device is auto-suspended. Here is a brief call tree highlighting the mutex deadlock: spi_mem_exec_op() ... spi_mem_access_start() mutex_lock(&ctlr->bus_lock_mutex) cqspi_exec_mem_op() pm_runtime_resume_and_get() cqspi_resume() spi_controller_resume() mutex_lock(&ctlr->bus_lock_mutex) ... spi_mem_access_end() mutex_unlock(&ctlr->bus_lock_mutex) ... Fixes: 0578a6dbfe75 ("spi: spi-cadence-quadspi: add runtime pm support") Signed-off-by: Théo Lebrun Link: https://msgid.link/r/20240222-cdns-qspi-pm-fix-v4-2-6b6af8bcbf59@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 0d184d65dce7..731775d34d39 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1930,14 +1930,10 @@ static void cqspi_remove(struct platform_device *pdev) static int cqspi_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - int ret; - ret = spi_controller_suspend(cqspi->host); cqspi_controller_enable(cqspi, 0); - clk_disable_unprepare(cqspi->clk); - - return ret; + return 0; } static int cqspi_resume(struct device *dev) @@ -1950,8 +1946,7 @@ static int cqspi_resume(struct device *dev) cqspi->current_cs = -1; cqspi->sclk = 0; - - return spi_controller_resume(cqspi->host); + return 0; } static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, -- cgit v1.2.3 From 4efa1250b59ebf47ce64a7b6b7c3e2e0a2a9d35a Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Thu, 22 Feb 2024 11:12:31 +0100 Subject: spi: cadence-qspi: put runtime in runtime PM hooks names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow kernel naming convention with regards to power-management callback function names. The convention in the kernel is: - prefix_suspend means the system-wide suspend callback; - prefix_runtime_suspend means the runtime PM suspend callback. The same applies to resume callbacks. Signed-off-by: Théo Lebrun Reviewed-by: Dhruva Gole Link: https://msgid.link/r/20240222-cdns-qspi-pm-fix-v4-3-6b6af8bcbf59@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 731775d34d39..4b2d42e54f56 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1927,7 +1927,7 @@ static void cqspi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); } -static int cqspi_suspend(struct device *dev) +static int cqspi_runtime_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); @@ -1936,7 +1936,7 @@ static int cqspi_suspend(struct device *dev) return 0; } -static int cqspi_resume(struct device *dev) +static int cqspi_runtime_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); @@ -1949,8 +1949,8 @@ static int cqspi_resume(struct device *dev) return 0; } -static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, - cqspi_resume, NULL); +static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_runtime_suspend, + cqspi_runtime_resume, NULL); static const struct cqspi_driver_platdata cdns_qspi = { .quirks = CQSPI_DISABLE_DAC_MODE, -- cgit v1.2.3 From 078d62de433b4f4556bb676e5dd670f0d4103376 Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Thu, 22 Feb 2024 11:12:32 +0100 Subject: spi: cadence-qspi: add system-wide suspend and resume callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each SPI controller is expected to call the spi_controller_suspend() and spi_controller_resume() callbacks at system-wide suspend and resume. It (1) handles the kthread worker for queued controllers and (2) marks the controller as suspended to have spi_sync() fail while the controller is unavailable. Those two operations do not require the controller to be active, we do not need to increment the runtime PM usage counter. Signed-off-by: Théo Lebrun Link: https://msgid.link/r/20240222-cdns-qspi-pm-fix-v4-4-6b6af8bcbf59@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 4b2d42e54f56..1a8d03958dff 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1949,8 +1949,24 @@ static int cqspi_runtime_resume(struct device *dev) return 0; } -static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_runtime_suspend, - cqspi_runtime_resume, NULL); +static int cqspi_suspend(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_suspend(cqspi->host); +} + +static int cqspi_resume(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_resume(cqspi->host); +} + +static const struct dev_pm_ops cqspi_dev_pm_ops = { + RUNTIME_PM_OPS(cqspi_runtime_suspend, cqspi_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(cqspi_suspend, cqspi_resume) +}; static const struct cqspi_driver_platdata cdns_qspi = { .quirks = CQSPI_DISABLE_DAC_MODE, -- cgit v1.2.3 From d3ea125df37dc37972d581b74a5d3785c3f283ab Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 9 Feb 2024 11:14:12 -0800 Subject: dmaengine: idxd: Ensure safe user copy of completion record If CONFIG_HARDENED_USERCOPY is enabled, copying completion record from event log cache to user triggers a kernel bug. [ 1987.159822] usercopy: Kernel memory exposure attempt detected from SLUB object 'dsa0' (offset 74, size 31)! [ 1987.170845] ------------[ cut here ]------------ [ 1987.176086] kernel BUG at mm/usercopy.c:102! [ 1987.180946] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 1987.186866] CPU: 17 PID: 528 Comm: kworker/17:1 Not tainted 6.8.0-rc2+ #5 [ 1987.194537] Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 [ 1987.206405] Workqueue: wq0.0 idxd_evl_fault_work [idxd] [ 1987.212338] RIP: 0010:usercopy_abort+0x72/0x90 [ 1987.217381] Code: 58 65 9c 50 48 c7 c2 17 85 61 9c 57 48 c7 c7 98 fd 6b 9c 48 0f 44 d6 48 c7 c6 b3 08 62 9c 4c 89 d1 49 0f 44 f3 e8 1e 2e d5 ff <0f> 0b 49 c7 c1 9e 42 61 9c 4c 89 cf 4d 89 c8 eb a9 66 66 2e 0f 1f [ 1987.238505] RSP: 0018:ff62f5cf20607d60 EFLAGS: 00010246 [ 1987.244423] RAX: 000000000000005f RBX: 000000000000001f RCX: 0000000000000000 [ 1987.252480] RDX: 0000000000000000 RSI: ffffffff9c61429e RDI: 00000000ffffffff [ 1987.260538] RBP: ff62f5cf20607d78 R08: ff2a6a89ef3fffe8 R09: 00000000fffeffff [ 1987.268595] R10: ff2a6a89eed00000 R11: 0000000000000003 R12: ff2a66934849c89a [ 1987.276652] R13: 0000000000000001 R14: ff2a66934849c8b9 R15: ff2a66934849c899 [ 1987.284710] FS: 0000000000000000(0000) GS:ff2a66b22fe40000(0000) knlGS:0000000000000000 [ 1987.293850] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1987.300355] CR2: 00007fe291a37000 CR3: 000000010fbd4005 CR4: 0000000000f71ef0 [ 1987.308413] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1987.316470] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 1987.324527] PKRU: 55555554 [ 1987.327622] Call Trace: [ 1987.330424] [ 1987.332826] ? show_regs+0x6e/0x80 [ 1987.336703] ? die+0x3c/0xa0 [ 1987.339988] ? do_trap+0xd4/0xf0 [ 1987.343662] ? do_error_trap+0x75/0xa0 [ 1987.347922] ? usercopy_abort+0x72/0x90 [ 1987.352277] ? exc_invalid_op+0x57/0x80 [ 1987.356634] ? usercopy_abort+0x72/0x90 [ 1987.360988] ? asm_exc_invalid_op+0x1f/0x30 [ 1987.365734] ? usercopy_abort+0x72/0x90 [ 1987.370088] __check_heap_object+0xb7/0xd0 [ 1987.374739] __check_object_size+0x175/0x2d0 [ 1987.379588] idxd_copy_cr+0xa9/0x130 [idxd] [ 1987.384341] idxd_evl_fault_work+0x127/0x390 [idxd] [ 1987.389878] process_one_work+0x13e/0x300 [ 1987.394435] ? __pfx_worker_thread+0x10/0x10 [ 1987.399284] worker_thread+0x2f7/0x420 [ 1987.403544] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 1987.409171] ? __pfx_worker_thread+0x10/0x10 [ 1987.414019] kthread+0x107/0x140 [ 1987.417693] ? __pfx_kthread+0x10/0x10 [ 1987.421954] ret_from_fork+0x3d/0x60 [ 1987.426019] ? __pfx_kthread+0x10/0x10 [ 1987.430281] ret_from_fork_asm+0x1b/0x30 [ 1987.434744] The issue arises because event log cache is created using kmem_cache_create() which is not suitable for user copy. Fix the issue by creating event log cache with kmem_cache_create_usercopy(), ensuring safe user copy. Fixes: c2f156bf168f ("dmaengine: idxd: create kmem cache for event log fault items") Reported-by: Tony Zhu Tested-by: Tony Zhu Signed-off-by: Fenghua Yu Reviewed-by: Lijun Pan Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240209191412.1050270-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 14df1f1347a8..4954adc6bb60 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -343,7 +343,9 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) static int idxd_init_evl(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; + unsigned int evl_cache_size; struct idxd_evl *evl; + const char *idxd_name; if (idxd->hw.gen_cap.evl_support == 0) return 0; @@ -355,9 +357,16 @@ static int idxd_init_evl(struct idxd_device *idxd) spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; - idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), - sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), - 0, 0, NULL); + idxd_name = dev_name(idxd_confdev(idxd)); + evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd); + /* + * Since completion record in evl_cache will be copied to user + * when handling completion record page fault, need to create + * the cache suitable for user copy. + */ + idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size, + 0, 0, 0, evl_cache_size, + NULL); if (!idxd->evl_cache) { kfree(evl); return -ENOMEM; -- cgit v1.2.3 From e281bfa6f759b667357454c1d3d283f7a52871da Mon Sep 17 00:00:00 2001 From: Huqiang Qin Date: Thu, 22 Feb 2024 15:46:38 +0800 Subject: irqchip/meson-gpio: Add support for Amlogic-T7 SoCs The Amlogic-T7 SoCs support 12 GPIO IRQ lines compared with previous serial chips and have something different, details are as below. IRQ Number: - 156 1 pin on bank TESTN - 155:148 8 pins on bank H - 147:129 19 pins on bank Y - 128:115 14 pins on bank M - 114:91 24 pins on bank T - 90:77 14 pins on bank Z - 76:70 7 pins on bank E - 69:57 13 pins on bank D - 56:40 17 pins on bank W - 39:20 20 pins on bank X - 19:13 7 pins on bank C - 12:0 13 pins on bank B Signed-off-by: Huqiang Qin Signed-off-by: Thomas Gleixner Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240222074640.1866284-3-huqiang.qin@amlogic.com --- drivers/irqchip/irq-meson-gpio.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index f88df39f4129..9a1791908598 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -154,6 +154,10 @@ static const struct meson_gpio_irq_params c3_params = { INIT_MESON_S4_COMMON_DATA(55) }; +static const struct meson_gpio_irq_params t7_params = { + INIT_MESON_S4_COMMON_DATA(157) +}; + static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = { { .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params }, { .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params }, @@ -165,6 +169,7 @@ static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = { { .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params }, { .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params }, { .compatible = "amlogic,c3-gpio-intc", .data = &c3_params }, + { .compatible = "amlogic,t7-gpio-intc", .data = &t7_params }, { } }; -- cgit v1.2.3 From 6f420d6a2dd8d4a795eab2839b0e08663269f9f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2024 08:36:46 +0100 Subject: pktcdvd: stop setting q->queuedata The two users can get the private data from the gendisk with one less pointer dereference, and we can drop the useless q parameter from pkt_make_request_write. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240222073647.3776769-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index abb82926b1c9..0cd65b27c197 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2338,9 +2338,9 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) pkt_queue_bio(pd, cloned_bio); } -static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +static void pkt_make_request_write(struct bio *bio) { - struct pktcdvd_device *pd = q->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; sector_t zone; struct packet_data *pkt; int was_empty, blocked_bio; @@ -2432,7 +2432,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) static void pkt_submit_bio(struct bio *bio) { - struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; struct device *ddev = disk_to_dev(pd->disk); struct bio *split; @@ -2476,7 +2476,7 @@ static void pkt_submit_bio(struct bio *bio) split = bio; } - pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + pkt_make_request_write(split); } while (split != bio); return; @@ -2490,7 +2490,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd) blk_queue_logical_block_size(q, CD_FRAMESIZE); blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); - q->queuedata = pd; } static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) -- cgit v1.2.3 From 4068550870360410261638479ffaf8364c366dd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2024 08:36:47 +0100 Subject: pktcdvd: set queue limits at disk allocation time Remove pkt_init_queue and just pass the two parameters directly to blk_alloc_disk. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240222073647.3776769-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0cd65b27c197..12fcc881b04f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2484,14 +2484,6 @@ end_io: bio_io_error(bio); } -static void pkt_init_queue(struct pktcdvd_device *pd) -{ - struct request_queue *q = pd->disk->queue; - - blk_queue_logical_block_size(q, CD_FRAMESIZE); - blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); -} - static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { struct device *ddev = disk_to_dev(pd->disk); @@ -2535,8 +2527,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) pd->bdev_handle = bdev_handle; set_blocksize(bdev_handle->bdev, CD_FRAMESIZE); - pkt_init_queue(pd); - atomic_set(&pd->cdrw.pending_bios, 0); pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name); if (IS_ERR(pd->cdrw.thread)) { @@ -2633,6 +2623,10 @@ static const struct block_device_operations pktcdvd_ops = { */ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) { + struct queue_limits lim = { + .max_hw_sectors = PACKET_MAX_SECTORS, + .logical_block_size = CD_FRAMESIZE, + }; int idx; int ret = -ENOMEM; struct pktcdvd_device *pd; @@ -2672,7 +2666,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_on = write_congestion_on; pd->write_congestion_off = write_congestion_off; - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_mem; -- cgit v1.2.3 From 0f225f87873ee95dd4cf94dfc6a3249d4289e4ea Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 22 Feb 2024 08:34:20 +0000 Subject: null_blk: Delete nullb.{queue_depth, nr_queues} Since commit 8b631f9cf0b8 ("null_blk: remove the bio based I/O path"), struct nullb members queue_depth and nr_queues are only ever written, so delete them. With that, null_exit_hctx() can also be deleted. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240222083420.6026-1-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 11 ----------- drivers/block/null_blk/null_blk.h | 2 -- 2 files changed, 13 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index a0b726c8366c..71c39bcd872c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1620,14 +1620,6 @@ static void null_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } -static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nullb_queue *nq = hctx->driver_data; - struct nullb *nullb = nq->dev->nullb; - - nullb->nr_queues--; -} - static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { nq->dev = nullb->dev; @@ -1647,7 +1639,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; null_init_queue(nullb, nq); - nullb->nr_queues++; return 0; } @@ -1660,7 +1651,6 @@ static const struct blk_mq_ops null_mq_ops = { .poll = null_poll, .map_queues = null_map_queues, .init_hctx = null_init_hctx, - .exit_hctx = null_exit_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -1731,7 +1721,6 @@ static int setup_queues(struct nullb *nullb) if (!nullb->queues) return -ENOMEM; - nullb->queue_depth = nullb->dev->hw_queue_depth; return 0; } diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 25320fe34bfe..477b97746823 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -114,14 +114,12 @@ struct nullb { struct gendisk *disk; struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set __tag_set; - unsigned int queue_depth; atomic_long_t cur_bytes; struct hrtimer bw_timer; unsigned long cache_flush_pos; spinlock_t lock; struct nullb_queue *queues; - unsigned int nr_queues; char disk_name[DISK_NAME_LEN]; }; -- cgit v1.2.3 From 1fa8d07ae1a5fa4e87de42c338e8fc27f46d8bb6 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 22 Feb 2024 03:05:16 +0200 Subject: gpu: host1x: Skip reset assert on Tegra186 On Tegra186, secure world applications may need to access host1x during suspend/resume, and rely on the kernel to keep Host1x out of reset during the suspend cycle. As such, as a quirk, skip asserting Host1x's reset on Tegra186. We don't need to keep the clocks enabled, as BPMP ensures the clock stays on while Host1x is being used. On newer SoC's, the reset line is inaccessible, so there is no need for the quirk. Fixes: b7c00cdf6df5 ("gpu: host1x: Enable system suspend callbacks") Signed-off-by: Mikko Perttunen Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240222010517.1573931-1-cyndis@kapsi.fi --- drivers/gpu/host1x/dev.c | 15 +++++++++------ drivers/gpu/host1x/dev.h | 6 ++++++ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 42fd504abbcd..89983d7d73ca 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) host1x_intr_stop(host); host1x_syncpt_save(host); - err = reset_control_bulk_assert(host->nresets, host->resets); - if (err) { - dev_err(dev, "failed to assert reset: %d\n", err); - goto resume_host1x; - } + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } - usleep_range(1000, 2000); + usleep_range(1000, 2000); + } clk_disable_unprepare(host->clk); reset_control_bulk_release(host->nresets, host->resets); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index c8e302de7625..925a118db23f 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -116,6 +116,12 @@ struct host1x_info { * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { -- cgit v1.2.3 From 45532b21dc2a692444b6ad5f71c253cca53e8103 Mon Sep 17 00:00:00 2001 From: Andre Werner Date: Mon, 19 Feb 2024 06:33:32 +0100 Subject: net: smsc95xx: add support for SYS TEC USB-SPEmodule1 This patch adds support for the SYS TEC USB-SPEmodule1 10Base-T1L ethernet device to the existing smsc95xx driver by adding the new USB VID/PID pair. Signed-off-by: Andre Werner Link: https://lore.kernel.org/r/20240219053413.4732-1-andre.werner@systec-electronic.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc95xx.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee257..2fa46baa589e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2104,6 +2104,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9E08), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), -- cgit v1.2.3 From 1878840a0328dac1c85d29fee31456ec26fcc01c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 19 Feb 2024 10:59:39 -0500 Subject: dmaengine: fsl-qdma: add __iomem and struct in union to fix sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix below sparse warnings. drivers/dma/fsl-qdma.c:645:50: sparse: warning: incorrect type in argument 2 (different address spaces) drivers/dma/fsl-qdma.c:645:50: sparse: expected void [noderef] __iomem *addr drivers/dma/fsl-qdma.c:645:50: sparse: got void drivers/dma/fsl-qdma.c:387:15: sparse: sparse: restricted __le32 degrades to integer drivers/dma/fsl-qdma.c:390:19: sparse: expected restricted __le64 [usertype] data drivers/dma/fsl-qdma.c:392:13: sparse: expected unsigned int [assigned] [usertype] cmd QDMA decriptor have below 3 kind formats. (little endian) Compound Command Descriptor Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┼─┴─┴─┼─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │DD │ - │QUEUE│ - │ ADDR │ ├──────┼───┴─────┴─────┴───────────────────────────────┴───────────────┤ │ 0x08 │ ADDR │ ├──────┼─────┬─────────────────┬───────────────────────────────────────┤ │ 0x04 │ FMT │ OFFSET │ - │ ├──────┼─┬─┬─┴─────────────────┴───────────────────────┬───────────────┤ │ │ │S│ │ │ │ 0x00 │-│E│ - │ STATUS │ │ │ │R│ │ │ └──────┴─┴─┴───────────────────────────────────────────┴───────────────┘ Compound S/G Table Entry Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │ - │ ADDR │ ├──────┼───────────────────────────────────────────────┴───────────────┤ │ 0x08 │ ADDR │ ├──────┼─┬─┬───────────────────────────────────────────────────────────┤ │ 0x04 │E│F│ LENGTH │ ├──────┼─┴─┴─────────────────────────────────┬─────────────────────────┤ │ 0x00 │ - │ OFFSET │ └──────┴─────────────────────────────────────┴─────────────────────────┘ Source/Destination Descriptor Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │ CMD │ ├──────┼───────────────────────────────────────────────────────────────┤ │ 0x08 │ - │ ├──────┼───────────────┬───────────────────────┬───────────────────────┤ │ 0x04 │ - │ S[D]SS │ S[D]SD │ ├──────┼───────────────┴───────────────────────┴───────────────────────┤ │ 0x00 │ - │ └──────┴───────────────────────────────────────────────────────────────┘ Previous code use 64bit 'data' map to 0x8 and 0xC. In little endian system CMD is high part of 64bit 'data'. It is correct by left shift 32. But in big endian system, shift left 32 will write to 0x8 position. Sparse detect this problem. Add below field ot match 'Source/Destination Descriptor Format'. struct { __le32 __reserved2; __le32 cmd; } __packed; Using ddf(sdf)->cmd save to correct posistion regardless endian. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402081929.mggOTHaZ-lkp@intel.com/ Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240219155939.611237-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 1e3bf6f30f78..5005e138fc23 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -161,6 +161,10 @@ struct fsl_qdma_format { u8 __reserved1[2]; u8 cfg8b_w1; } __packed; + struct { + __le32 __reserved2; + __le32 cmd; + } __packed; __le64 data; }; } __packed; @@ -355,7 +359,6 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan) static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, dma_addr_t dst, dma_addr_t src, u32 len) { - u32 cmd; struct fsl_qdma_format *sdf, *ddf; struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest; @@ -384,15 +387,11 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, /* This entry is the last entry. */ qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET) | - FSL_QDMA_CMD_PF; - sdf->data = QDMA_SDDF_CMD(cmd); - - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); - cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET); - ddf->data = QDMA_SDDF_CMD(cmd); + sdf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF); + + ddf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + (FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET)); } /* @@ -626,7 +625,7 @@ static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, - void *block, + __iomem void *block, int id) { bool duplicate; -- cgit v1.2.3 From df2515a17914ecfc2a0594509deaf7fcb8d191ac Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 22 Feb 2024 17:30:53 +0100 Subject: dmaengine: ptdma: use consistent DMA masks The PTDMA driver sets DMA masks in two different places for the same device inconsistently. First call is in pt_pci_probe(), where it uses 48bit mask. The second call is in pt_dmaengine_register(), where it uses a 64bit mask. Using 64bit dma mask causes IO_PAGE_FAULT errors on DMA transfers between main memory and other devices. Without the extra call it works fine. Additionally the second call doesn't check the return value so it can silently fail. Remove the superfluous dma_set_mask() call and only use 48bit mask. Cc: stable@vger.kernel.org Fixes: b0b4a6b10577 ("dmaengine: ptdma: register PTDMA controller as a DMA resource") Reviewed-by: Basavaraj Natikar Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20240222163053.13842-1-tstruk@gigaio.com Signed-off-by: Vinod Koul --- drivers/dma/ptdma/ptdma-dmaengine.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3..f79240734807 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; -- cgit v1.2.3 From 96303bcb401c21dc1426d8d9bb1fc74aae5c02a9 Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 22 Feb 2024 16:39:38 +0800 Subject: irqchip/riscv-intc: Allow large non-standard interrupt number Currently, the implementation of the RISC-V INTC driver uses the interrupt cause as the hardware interrupt number, with a maximum of 64 interrupts. However, the platform can expand the interrupt number further for custom local interrupts. To fully utilize the available local interrupt sources, switch to using irq_domain_create_tree() that creates the radix tree map, add global variables (riscv_intc_nr_irqs, riscv_intc_custom_base and riscv_intc_custom_nr_irqs) to determine the valid range of local interrupt number (hwirq). Signed-off-by: Yu Chien Peter Lin Signed-off-by: Thomas Gleixner Reviewed-by: Randolph Reviewed-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20240222083946.3977135-3-peterlin@andestech.com --- drivers/irqchip/irq-riscv-intc.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index e8d01b14ccdd..684875c39728 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -19,15 +19,16 @@ #include static struct irq_domain *intc_domain; +static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG; +static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG; +static unsigned int riscv_intc_custom_nr_irqs __ro_after_init; static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; - if (unlikely(cause >= BITS_PER_LONG)) - panic("unexpected interrupt cause"); - - generic_handle_domain_irq(intc_domain, cause); + if (generic_handle_domain_irq(intc_domain, cause)) + pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause); } /* @@ -93,6 +94,14 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain, if (ret) return ret; + /* + * Only allow hwirq for which we have corresponding standard or + * custom interrupt enable register. + */ + if ((hwirq >= riscv_intc_nr_irqs && hwirq < riscv_intc_custom_base) || + (hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs)) + return -EINVAL; + for (i = 0; i < nr_irqs; i++) { ret = riscv_intc_domain_map(domain, virq + i, hwirq + i); if (ret) @@ -117,8 +126,7 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) { int rc; - intc_domain = irq_domain_create_linear(fn, BITS_PER_LONG, - &riscv_intc_domain_ops, NULL); + intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, NULL); if (!intc_domain) { pr_err("unable to add IRQ domain\n"); return -ENXIO; @@ -132,7 +140,11 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) riscv_set_intc_hwnode_fn(riscv_intc_hwnode); - pr_info("%d local interrupts mapped\n", BITS_PER_LONG); + pr_info("%d local interrupts mapped\n", riscv_intc_nr_irqs); + if (riscv_intc_custom_nr_irqs) { + pr_info("%d custom local interrupts mapped\n", + riscv_intc_custom_nr_irqs); + } return 0; } -- cgit v1.2.3 From f4cc33e78ba8624a79ba8dea98ce5c85aa9ca33c Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 22 Feb 2024 16:39:39 +0800 Subject: irqchip/riscv-intc: Introduce Andes hart-level interrupt controller Add support for the Andes hart-level interrupt controller. This controller provides interrupt mask/unmask functions to access the custom register (SLIE) where the non-standard S-mode local interrupt enable bits are located. The base of custom interrupt number is set to 256. To share the riscv_intc_domain_map() with the generic RISC-V INTC and ACPI, add a chip parameter to riscv_intc_init_common(), so it can be passed to the irq_domain_set_info() as a private data. Andes hart-level interrupt controller requires the "andestech,cpu-intc" compatible string to be present in interrupt-controller of cpu node to enable the use of custom local interrupt source. e.g., cpu0: cpu@0 { compatible = "andestech,ax45mp", "riscv"; ... cpu0-intc: interrupt-controller { #interrupt-cells = <0x01>; compatible = "andestech,cpu-intc", "riscv,cpu-intc"; interrupt-controller; }; }; Signed-off-by: Yu Chien Peter Lin Signed-off-by: Thomas Gleixner Reviewed-by: Randolph Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240222083946.3977135-4-peterlin@andestech.com --- drivers/irqchip/irq-riscv-intc.c | 58 +++++++++++++++++++++++++++++++++++----- include/linux/soc/andes/irq.h | 18 +++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) create mode 100644 include/linux/soc/andes/irq.h (limited to 'drivers') diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index 684875c39728..0cd6b48a5dbf 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -17,6 +17,7 @@ #include #include #include +#include static struct irq_domain *intc_domain; static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG; @@ -48,6 +49,31 @@ static void riscv_intc_irq_unmask(struct irq_data *d) csr_set(CSR_IE, BIT(d->hwirq)); } +static void andes_intc_irq_mask(struct irq_data *d) +{ + /* + * Andes specific S-mode local interrupt causes (hwirq) + * are defined as (256 + n) and controlled by n-th bit + * of SLIE. + */ + unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); + + if (d->hwirq < ANDES_SLI_CAUSE_BASE) + csr_clear(CSR_IE, mask); + else + csr_clear(ANDES_CSR_SLIE, mask); +} + +static void andes_intc_irq_unmask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); + + if (d->hwirq < ANDES_SLI_CAUSE_BASE) + csr_set(CSR_IE, mask); + else + csr_set(ANDES_CSR_SLIE, mask); +} + static void riscv_intc_irq_eoi(struct irq_data *d) { /* @@ -71,12 +97,21 @@ static struct irq_chip riscv_intc_chip = { .irq_eoi = riscv_intc_irq_eoi, }; +static struct irq_chip andes_intc_chip = { + .name = "RISC-V INTC", + .irq_mask = andes_intc_irq_mask, + .irq_unmask = andes_intc_irq_unmask, + .irq_eoi = riscv_intc_irq_eoi, +}; + static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { + struct irq_chip *chip = d->host_data; + irq_set_percpu_devid(irq); - irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data, - handle_percpu_devid_irq, NULL, NULL); + irq_domain_set_info(d, irq, hwirq, chip, NULL, handle_percpu_devid_irq, + NULL, NULL); return 0; } @@ -122,11 +157,12 @@ static struct fwnode_handle *riscv_intc_hwnode(void) return intc_domain->fwnode; } -static int __init riscv_intc_init_common(struct fwnode_handle *fn) +static int __init riscv_intc_init_common(struct fwnode_handle *fn, + struct irq_chip *chip) { int rc; - intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, NULL); + intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, chip); if (!intc_domain) { pr_err("unable to add IRQ domain\n"); return -ENXIO; @@ -152,8 +188,9 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) static int __init riscv_intc_init(struct device_node *node, struct device_node *parent) { - int rc; + struct irq_chip *chip = &riscv_intc_chip; unsigned long hartid; + int rc; rc = riscv_of_parent_hartid(node, &hartid); if (rc < 0) { @@ -178,10 +215,17 @@ static int __init riscv_intc_init(struct device_node *node, return 0; } - return riscv_intc_init_common(of_node_to_fwnode(node)); + if (of_device_is_compatible(node, "andestech,cpu-intc")) { + riscv_intc_custom_base = ANDES_SLI_CAUSE_BASE; + riscv_intc_custom_nr_irqs = ANDES_RV_IRQ_LAST; + chip = &andes_intc_chip; + } + + return riscv_intc_init_common(of_node_to_fwnode(node), chip); } IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init); +IRQCHIP_DECLARE(andes, "andestech,cpu-intc", riscv_intc_init); #ifdef CONFIG_ACPI @@ -208,7 +252,7 @@ static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, return -ENOMEM; } - return riscv_intc_init_common(fn); + return riscv_intc_init_common(fn, &riscv_intc_chip); } IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL, diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ -- cgit v1.2.3 From 8ec99b033147ef3bb8f0a560c24eb1baec3bc0be Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:49 +0530 Subject: irqchip/sifive-plic: Convert PLIC driver into a platform driver The PLIC driver does not require very early initialization so convert it into a platform driver. After conversion, the PLIC driver is probed after CPUs are brought-up so setup cpuhp state after context handler of all online CPUs are initialized otherwise PLIC driver crashes for platforms with multiple PLIC instances. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-2-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 101 +++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 5b7bc4fd9517..7400a07fc479 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -64,6 +64,7 @@ #define PLIC_QUIRK_EDGE_INTERRUPT 0 struct plic_priv { + struct device *dev; struct cpumask lmask; struct irq_domain *irqdomain; void __iomem *regs; @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) return 0; } -static int __init __plic_init(struct device_node *node, - struct device_node *parent, - unsigned long plic_quirks) +static const struct of_device_id plic_match[] = { + { .compatible = "sifive,plic-1.0.0" }, + { .compatible = "riscv,plic0" }, + { .compatible = "andestech,nceplic100", + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, + { .compatible = "thead,c900-plic", + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, + {} +}; + +static int plic_probe(struct platform_device *pdev) { int error = 0, nr_contexts, nr_handlers = 0, i; - u32 nr_irqs; - struct plic_priv *priv; + struct device *dev = &pdev->dev; + unsigned long plic_quirks = 0; struct plic_handler *handler; + struct plic_priv *priv; + bool cpuhp_setup; unsigned int cpu; + u32 nr_irqs; + + if (is_of_node(dev->fwnode)) { + const struct of_device_id *id; + + id = of_match_node(plic_match, to_of_node(dev->fwnode)); + if (id) + plic_quirks = (unsigned long)id->data; + } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + priv->dev = dev; priv->plic_quirks = plic_quirks; - priv->regs = of_iomap(node, 0); + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); if (WARN_ON(!priv->regs)) { error = -EIO; goto out_free_priv; } error = -EINVAL; - of_property_read_u32(node, "riscv,ndev", &nr_irqs); + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); if (WARN_ON(!nr_irqs)) goto out_iounmap; @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, if (!priv->prio_save) goto out_free_priority_reg; - nr_contexts = of_irq_count(node); + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); if (WARN_ON(!nr_contexts)) goto out_free_priority_reg; error = -ENOMEM; - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, - &plic_irqdomain_ops, priv); + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, + &plic_irqdomain_ops, priv); if (WARN_ON(!priv->irqdomain)) goto out_free_priority_reg; @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, int cpu; unsigned long hartid; - if (of_irq_parse_one(node, i, &parent)) { + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { pr_err("failed to parse parent for context %d.\n", i); continue; } @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, /* Find parent domain and register chained handler */ if (!plic_parent_irq && irq_find_host(parent.np)) { - plic_parent_irq = irq_of_parse_and_map(node, i); + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); if (plic_parent_irq) irq_set_chained_handler(plic_parent_irq, plic_handle_irq); @@ -533,20 +554,29 @@ done: /* * We can have multiple PLIC instances so setup cpuhp state - * and register syscore operations only when context handler - * for current/boot CPU is present. + * and register syscore operations only once after context + * handlers of all online CPUs are initialized. */ - handler = this_cpu_ptr(&plic_handlers); - if (handler->present && !plic_cpuhp_setup_done) { - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, - "irqchip/sifive/plic:starting", - plic_starting_cpu, plic_dying_cpu); - register_syscore_ops(&plic_irq_syscore_ops); - plic_cpuhp_setup_done = true; + if (!plic_cpuhp_setup_done) { + cpuhp_setup = true; + for_each_online_cpu(cpu) { + handler = per_cpu_ptr(&plic_handlers, cpu); + if (!handler->present) { + cpuhp_setup = false; + break; + } + } + if (cpuhp_setup) { + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + "irqchip/sifive/plic:starting", + plic_starting_cpu, plic_dying_cpu); + register_syscore_ops(&plic_irq_syscore_ops); + plic_cpuhp_setup_done = true; + } } - pr_info("%pOFP: mapped %d interrupts with %d handlers for" - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); return 0; out_free_enable_reg: @@ -563,20 +593,11 @@ out_free_priv: return error; } -static int __init plic_init(struct device_node *node, - struct device_node *parent) -{ - return __plic_init(node, parent, 0); -} - -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ - -static int __init plic_edge_init(struct device_node *node, - struct device_node *parent) -{ - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); -} - -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); +static struct platform_driver plic_driver = { + .driver = { + .name = "riscv-plic", + .of_match_table = plic_match, + }, + .probe = plic_probe, +}; +builtin_platform_driver(plic_driver); -- cgit v1.2.3 From 25d862e183d4efeb5e8b9843d783c90aaae4b14a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:50 +0530 Subject: irqchip/sifive-plic: Use dev_xyz() in-place of pr_xyz() Use dev_info(), dev_warn(), and dev_err() in-place of pr_info(), pr_warn(), and pr_err(). Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-3-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 7400a07fc479..892666f0cc71 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -3,7 +3,6 @@ * Copyright (C) 2017 SiFive * Copyright (C) 2018 Christoph Hellwig */ -#define pr_fmt(fmt) "plic: " fmt #include #include #include @@ -371,9 +370,10 @@ static void plic_handle_irq(struct irq_desc *desc) while ((hwirq = readl(claim))) { int err = generic_handle_domain_irq(handler->priv->irqdomain, hwirq); - if (unlikely(err)) - pr_warn_ratelimited("can't find mapping for hwirq %lu\n", - hwirq); + if (unlikely(err)) { + dev_warn_ratelimited(handler->priv->dev, + "can't find mapping for hwirq %lu\n", hwirq); + } } chained_irq_exit(chip, desc); @@ -401,7 +401,7 @@ static int plic_starting_cpu(unsigned int cpu) enable_percpu_irq(plic_parent_irq, irq_get_trigger_type(plic_parent_irq)); else - pr_warn("cpu%d: parent irq not available\n", cpu); + dev_warn(handler->priv->dev, "cpu%d: parent irq not available\n", cpu); plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD); return 0; @@ -477,7 +477,7 @@ static int plic_probe(struct platform_device *pdev) unsigned long hartid; if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { - pr_err("failed to parse parent for context %d.\n", i); + dev_err(dev, "failed to parse parent for context %d.\n", i); continue; } @@ -500,13 +500,13 @@ static int plic_probe(struct platform_device *pdev) error = riscv_of_parent_hartid(parent.np, &hartid); if (error < 0) { - pr_warn("failed to parse hart ID for context %d.\n", i); + dev_warn(dev, "failed to parse hart ID for context %d.\n", i); continue; } cpu = riscv_hartid_to_cpuid(hartid); if (cpu < 0) { - pr_warn("Invalid cpuid for context %d\n", i); + dev_warn(dev, "Invalid cpuid for context %d\n", i); continue; } @@ -525,7 +525,7 @@ static int plic_probe(struct platform_device *pdev) */ handler = per_cpu_ptr(&plic_handlers, cpu); if (handler->present) { - pr_warn("handler already present for context %d.\n", i); + dev_warn(dev, "handler already present for context %d.\n", i); plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD); goto done; } @@ -575,8 +575,8 @@ done: } } - pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", - to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); + dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n", + nr_irqs, nr_handlers, nr_contexts); return 0; out_free_enable_reg: -- cgit v1.2.3 From b68d0ff529a939a118ec52f271be8cad5d99e79a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:51 +0530 Subject: irqchip/sifive-plic: Use devm_xyz() for managed allocation Use devm_xyz() for allocations and mappings managed by the Linux device driver framework. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-4-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 49 +++++++++++++-------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 892666f0cc71..299feefa4207 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -436,39 +436,30 @@ static int plic_probe(struct platform_device *pdev) plic_quirks = (unsigned long)id->data; } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->dev = dev; priv->plic_quirks = plic_quirks; - priv->regs = of_iomap(to_of_node(dev->fwnode), 0); - if (WARN_ON(!priv->regs)) { - error = -EIO; - goto out_free_priv; - } + priv->regs = devm_platform_ioremap_resource(pdev, 0); + if (WARN_ON(!priv->regs)) + return -EIO; - error = -EINVAL; of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); if (WARN_ON(!nr_irqs)) - goto out_iounmap; + return -EINVAL; priv->nr_irqs = nr_irqs; - priv->prio_save = bitmap_alloc(nr_irqs, GFP_KERNEL); + priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL); if (!priv->prio_save) - goto out_free_priority_reg; + return -ENOMEM; nr_contexts = of_irq_count(to_of_node(dev->fwnode)); if (WARN_ON(!nr_contexts)) - goto out_free_priority_reg; - - error = -ENOMEM; - priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, - &plic_irqdomain_ops, priv); - if (WARN_ON(!priv->irqdomain)) - goto out_free_priority_reg; + return -EINVAL; for (i = 0; i < nr_contexts; i++) { struct of_phandle_args parent; @@ -539,10 +530,10 @@ static int plic_probe(struct platform_device *pdev) i * CONTEXT_ENABLE_SIZE; handler->priv = priv; - handler->enable_save = kcalloc(DIV_ROUND_UP(nr_irqs, 32), - sizeof(*handler->enable_save), GFP_KERNEL); + handler->enable_save = devm_kcalloc(dev, DIV_ROUND_UP(nr_irqs, 32), + sizeof(*handler->enable_save), GFP_KERNEL); if (!handler->enable_save) - goto out_free_enable_reg; + return -ENOMEM; done: for (hwirq = 1; hwirq <= nr_irqs; hwirq++) { plic_toggle(handler, hwirq, 0); @@ -552,6 +543,11 @@ done: nr_handlers++; } + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, + &plic_irqdomain_ops, priv); + if (WARN_ON(!priv->irqdomain)) + return -ENOMEM; + /* * We can have multiple PLIC instances so setup cpuhp state * and register syscore operations only once after context @@ -578,19 +574,6 @@ done: dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n", nr_irqs, nr_handlers, nr_contexts); return 0; - -out_free_enable_reg: - for_each_cpu(cpu, cpu_present_mask) { - handler = per_cpu_ptr(&plic_handlers, cpu); - kfree(handler->enable_save); - } -out_free_priority_reg: - kfree(priv->prio_save); -out_iounmap: - iounmap(priv->regs); -out_free_priv: - kfree(priv); - return error; } static struct platform_driver plic_driver = { -- cgit v1.2.3 From 6c725f33d67b53f2d302c2c4509deae953fc6ade Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:52 +0530 Subject: irqchip/sifive-plic: Use riscv_get_intc_hwnode() to get parent fwnode The RISC-V INTC irqdomain is always the parent irqdomain of SiFive PLIC so use riscv_get_intc_hwnode() to get the parent fwnode similar to other RISC-V drivers which use local interrupts. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-5-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 299feefa4207..208fad76f560 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -423,6 +423,7 @@ static int plic_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; unsigned long plic_quirks = 0; struct plic_handler *handler; + struct irq_domain *domain; struct plic_priv *priv; bool cpuhp_setup; unsigned int cpu; @@ -502,11 +503,11 @@ static int plic_probe(struct platform_device *pdev) } /* Find parent domain and register chained handler */ - if (!plic_parent_irq && irq_find_host(parent.np)) { - plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); + if (!plic_parent_irq && domain) { + plic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT); if (plic_parent_irq) - irq_set_chained_handler(plic_parent_irq, - plic_handle_irq); + irq_set_chained_handler(plic_parent_irq, plic_handle_irq); } /* -- cgit v1.2.3 From a15587277a246c388c83b1cd9cf7c1a868cd752f Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:53 +0530 Subject: irqchip/sifive-plic: Cleanup PLIC contexts upon irqdomain creation failure The SiFive PLIC contexts should not be left dangling if irqdomain creation fails because plic_starting_cpu() can crash accessing unmapped registers. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-6-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 73 ++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 208fad76f560..a399cb3f44af 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -417,17 +417,45 @@ static const struct of_device_id plic_match[] = { {} }; +static int plic_parse_context_parent(struct platform_device *pdev, u32 context, + u32 *parent_hwirq, int *parent_cpu) +{ + struct device *dev = &pdev->dev; + struct of_phandle_args parent; + unsigned long hartid; + int rc; + + /* + * Currently, only OF fwnode is supported so extend this + * function for ACPI support. + */ + if (!is_of_node(dev->fwnode)) + return -EINVAL; + + rc = of_irq_parse_one(to_of_node(dev->fwnode), context, &parent); + if (rc) + return rc; + + rc = riscv_of_parent_hartid(parent.np, &hartid); + if (rc) + return rc; + + *parent_hwirq = parent.args[0]; + *parent_cpu = riscv_hartid_to_cpuid(hartid); + return 0; +} + static int plic_probe(struct platform_device *pdev) { - int error = 0, nr_contexts, nr_handlers = 0, i; + int error = 0, nr_contexts, nr_handlers = 0, cpu, i; struct device *dev = &pdev->dev; unsigned long plic_quirks = 0; struct plic_handler *handler; + u32 nr_irqs, parent_hwirq; struct irq_domain *domain; struct plic_priv *priv; + irq_hw_number_t hwirq; bool cpuhp_setup; - unsigned int cpu; - u32 nr_irqs; if (is_of_node(dev->fwnode)) { const struct of_device_id *id; @@ -463,13 +491,9 @@ static int plic_probe(struct platform_device *pdev) return -EINVAL; for (i = 0; i < nr_contexts; i++) { - struct of_phandle_args parent; - irq_hw_number_t hwirq; - int cpu; - unsigned long hartid; - - if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { - dev_err(dev, "failed to parse parent for context %d.\n", i); + error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu); + if (error) { + dev_warn(dev, "hwirq for context%d not found\n", i); continue; } @@ -477,7 +501,7 @@ static int plic_probe(struct platform_device *pdev) * Skip contexts other than external interrupts for our * privilege level. */ - if (parent.args[0] != RV_IRQ_EXT) { + if (parent_hwirq != RV_IRQ_EXT) { /* Disable S-mode enable bits if running in M-mode. */ if (IS_ENABLED(CONFIG_RISCV_M_MODE)) { void __iomem *enable_base = priv->regs + @@ -490,13 +514,6 @@ static int plic_probe(struct platform_device *pdev) continue; } - error = riscv_of_parent_hartid(parent.np, &hartid); - if (error < 0) { - dev_warn(dev, "failed to parse hart ID for context %d.\n", i); - continue; - } - - cpu = riscv_hartid_to_cpuid(hartid); if (cpu < 0) { dev_warn(dev, "Invalid cpuid for context %d\n", i); continue; @@ -534,7 +551,7 @@ static int plic_probe(struct platform_device *pdev) handler->enable_save = devm_kcalloc(dev, DIV_ROUND_UP(nr_irqs, 32), sizeof(*handler->enable_save), GFP_KERNEL); if (!handler->enable_save) - return -ENOMEM; + goto fail_cleanup_contexts; done: for (hwirq = 1; hwirq <= nr_irqs; hwirq++) { plic_toggle(handler, hwirq, 0); @@ -547,7 +564,7 @@ done: priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, &plic_irqdomain_ops, priv); if (WARN_ON(!priv->irqdomain)) - return -ENOMEM; + goto fail_cleanup_contexts; /* * We can have multiple PLIC instances so setup cpuhp state @@ -575,6 +592,22 @@ done: dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n", nr_irqs, nr_handlers, nr_contexts); return 0; + +fail_cleanup_contexts: + for (i = 0; i < nr_contexts; i++) { + if (plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu)) + continue; + if (parent_hwirq != RV_IRQ_EXT || cpu < 0) + continue; + + handler = per_cpu_ptr(&plic_handlers, cpu); + handler->present = false; + handler->hart_base = NULL; + handler->enable_base = NULL; + handler->enable_save = NULL; + handler->priv = NULL; + } + return -ENOMEM; } static struct platform_driver plic_driver = { -- cgit v1.2.3 From 95652106478030f54620b1f0d28f78ab110b3212 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:54 +0530 Subject: irqchip/sifive-plic: Parse number of interrupts and contexts early in plic_probe() The SiFive PLIC driver needs to know the number of interrupts and contexts to complete initialization. Parse these details early in plic_probe() to avoid unnecessary memory allocations and register mappings if these details are not available. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-7-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 43 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index a399cb3f44af..474ddc33a54a 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -417,6 +417,34 @@ static const struct of_device_id plic_match[] = { {} }; +static int plic_parse_nr_irqs_and_contexts(struct platform_device *pdev, + u32 *nr_irqs, u32 *nr_contexts) +{ + struct device *dev = &pdev->dev; + int rc; + + /* + * Currently, only OF fwnode is supported so extend this + * function for ACPI support. + */ + if (!is_of_node(dev->fwnode)) + return -EINVAL; + + rc = of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", nr_irqs); + if (rc) { + dev_err(dev, "riscv,ndev property not available\n"); + return rc; + } + + *nr_contexts = of_irq_count(to_of_node(dev->fwnode)); + if (WARN_ON(!(*nr_contexts))) { + dev_err(dev, "no PLIC context available\n"); + return -EINVAL; + } + + return 0; +} + static int plic_parse_context_parent(struct platform_device *pdev, u32 context, u32 *parent_hwirq, int *parent_cpu) { @@ -465,31 +493,26 @@ static int plic_probe(struct platform_device *pdev) plic_quirks = (unsigned long)id->data; } + error = plic_parse_nr_irqs_and_contexts(pdev, &nr_irqs, &nr_contexts); + if (error) + return error; + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->dev = dev; priv->plic_quirks = plic_quirks; + priv->nr_irqs = nr_irqs; priv->regs = devm_platform_ioremap_resource(pdev, 0); if (WARN_ON(!priv->regs)) return -EIO; - of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); - if (WARN_ON(!nr_irqs)) - return -EINVAL; - - priv->nr_irqs = nr_irqs; - priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL); if (!priv->prio_save) return -ENOMEM; - nr_contexts = of_irq_count(to_of_node(dev->fwnode)); - if (WARN_ON(!nr_contexts)) - return -EINVAL; - for (i = 0; i < nr_contexts; i++) { error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu); if (error) { -- cgit v1.2.3 From abb7205794900503d6358ef1fb645373753a794d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:55 +0530 Subject: irqchip/sifive-plic: Improve locking safety by using irqsave/irqrestore Now that PLIC driver is probed as a regular platform driver, the lock dependency validator complains about the safety of handler->enable_lock usage: [ 0.956775] Possible interrupt unsafe locking scenario: [ 0.956998] CPU0 CPU1 [ 0.957247] ---- ---- [ 0.957439] lock(&handler->enable_lock); [ 0.957607] local_irq_disable(); [ 0.957793] lock(&irq_desc_lock_class); [ 0.958021] lock(&handler->enable_lock); [ 0.958246] [ 0.958342] lock(&irq_desc_lock_class); [ 0.958501] *** DEADLOCK *** To address above, use raw_spin_lock_irqsave/unlock_irqrestore() instead of raw_spin_lock/unlock(). Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-8-apatel@ventanamicro.com --- drivers/irqchip/irq-sifive-plic.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 474ddc33a54a..601000d2a351 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -103,9 +103,11 @@ static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable) static void plic_toggle(struct plic_handler *handler, int hwirq, int enable) { - raw_spin_lock(&handler->enable_lock); + unsigned long flags; + + raw_spin_lock_irqsave(&handler->enable_lock, flags); __plic_toggle(handler->enable_base, hwirq, enable); - raw_spin_unlock(&handler->enable_lock); + raw_spin_unlock_irqrestore(&handler->enable_lock, flags); } static inline void plic_irq_toggle(const struct cpumask *mask, @@ -236,6 +238,7 @@ static int plic_irq_set_type(struct irq_data *d, unsigned int type) static int plic_irq_suspend(void) { unsigned int i, cpu; + unsigned long flags; u32 __iomem *reg; struct plic_priv *priv; @@ -253,12 +256,12 @@ static int plic_irq_suspend(void) if (!handler->present) continue; - raw_spin_lock(&handler->enable_lock); + raw_spin_lock_irqsave(&handler->enable_lock, flags); for (i = 0; i < DIV_ROUND_UP(priv->nr_irqs, 32); i++) { reg = handler->enable_base + i * sizeof(u32); handler->enable_save[i] = readl(reg); } - raw_spin_unlock(&handler->enable_lock); + raw_spin_unlock_irqrestore(&handler->enable_lock, flags); } return 0; @@ -267,6 +270,7 @@ static int plic_irq_suspend(void) static void plic_irq_resume(void) { unsigned int i, index, cpu; + unsigned long flags; u32 __iomem *reg; struct plic_priv *priv; @@ -284,12 +288,12 @@ static void plic_irq_resume(void) if (!handler->present) continue; - raw_spin_lock(&handler->enable_lock); + raw_spin_lock_irqsave(&handler->enable_lock, flags); for (i = 0; i < DIV_ROUND_UP(priv->nr_irqs, 32); i++) { reg = handler->enable_base + i * sizeof(u32); writel(handler->enable_save[i], reg); } - raw_spin_unlock(&handler->enable_lock); + raw_spin_unlock_irqrestore(&handler->enable_lock, flags); } } -- cgit v1.2.3 From 3c46fc5b5507be1f4aa144a1fbd83b0ccba04cc6 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 22 Feb 2024 15:09:56 +0530 Subject: irqchip/riscv-intc: Add support for RISC-V AIA The RISC-V advanced interrupt architecture (AIA) extends the per-HART local interrupts in following ways: 1. Minimum 64 local interrupts for both RV32 and RV64 2. Ability to process multiple pending local interrupts in same interrupt handler 3. Priority configuration for each local interrupts 4. Special CSRs to configure/access the per-HART MSI controller Add support for #1 and #2 described above in the RISC-V intc driver. Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240222094006.1030709-9-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-intc.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index 0cd6b48a5dbf..cccb65339982 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -19,6 +19,8 @@ #include #include +#include + static struct irq_domain *intc_domain; static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG; static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG; @@ -32,6 +34,14 @@ static asmlinkage void riscv_intc_irq(struct pt_regs *regs) pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause); } +static asmlinkage void riscv_intc_aia_irq(struct pt_regs *regs) +{ + unsigned long topi; + + while ((topi = csr_read(CSR_TOPI))) + generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT); +} + /* * On RISC-V systems local interrupts are masked or unmasked by writing * the SIE (Supervisor Interrupt Enable) CSR. As CSRs can only be written @@ -41,12 +51,18 @@ static asmlinkage void riscv_intc_irq(struct pt_regs *regs) static void riscv_intc_irq_mask(struct irq_data *d) { - csr_clear(CSR_IE, BIT(d->hwirq)); + if (IS_ENABLED(CONFIG_32BIT) && d->hwirq >= BITS_PER_LONG) + csr_clear(CSR_IEH, BIT(d->hwirq - BITS_PER_LONG)); + else + csr_clear(CSR_IE, BIT(d->hwirq)); } static void riscv_intc_irq_unmask(struct irq_data *d) { - csr_set(CSR_IE, BIT(d->hwirq)); + if (IS_ENABLED(CONFIG_32BIT) && d->hwirq >= BITS_PER_LONG) + csr_set(CSR_IEH, BIT(d->hwirq - BITS_PER_LONG)); + else + csr_set(CSR_IE, BIT(d->hwirq)); } static void andes_intc_irq_mask(struct irq_data *d) @@ -157,8 +173,7 @@ static struct fwnode_handle *riscv_intc_hwnode(void) return intc_domain->fwnode; } -static int __init riscv_intc_init_common(struct fwnode_handle *fn, - struct irq_chip *chip) +static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_chip *chip) { int rc; @@ -176,11 +191,10 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn, riscv_set_intc_hwnode_fn(riscv_intc_hwnode); - pr_info("%d local interrupts mapped\n", riscv_intc_nr_irqs); - if (riscv_intc_custom_nr_irqs) { - pr_info("%d custom local interrupts mapped\n", - riscv_intc_custom_nr_irqs); - } + pr_info("%d local interrupts mapped\n", + riscv_isa_extension_available(NULL, SxAIA) ? 64 : riscv_intc_nr_irqs); + if (riscv_intc_custom_nr_irqs) + pr_info("%d custom local interrupts mapped\n", riscv_intc_custom_nr_irqs); return 0; } -- cgit v1.2.3 From 9ee485bdda68d6d3f5728cbe3150eb9013d7d22b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:23 +0100 Subject: drm/bridge: aux-hpd: fix OF node leaks The two device node references taken during allocation need to be dropped when the auxiliary device is freed. Fixes: 6914968a0b52 ("drm/bridge: properly refcount DT nodes in aux bridge drivers") Cc: Dmitry Baryshkov Cc: Neil Armstrong Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240217150228.5788-2-johan+linaro@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-2-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index bb55f697a181..9e71daf95bde 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -25,6 +25,7 @@ static void drm_aux_hpd_bridge_release(struct device *dev) ida_free(&drm_aux_hpd_bridge_ida, adev->id); of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); kfree(adev); } @@ -74,6 +75,8 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); ida_free(&drm_aux_hpd_bridge_ida, adev->id); kfree(adev); return ERR_PTR(ret); -- cgit v1.2.3 From 734f06db599f66d6a159c78abfdbadfea3b7d43b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 21 Feb 2024 00:34:42 +0200 Subject: net: dpaa: fman_memac: accept phy-interface-type = "10gbase-r" in the device tree Since commit 5d93cfcf7360 ("net: dpaa: Convert to phylink"), we support the "10gbase-r" phy-mode through a driver-based conversion of "xgmii", but we still don't actually support it when the device tree specifies "10gbase-r" proper. This is because boards such as LS1046A-RDB do not define pcs-handle-names (for whatever reason) in the ethernet@f0000 device tree node, and the code enters through this code path: err = of_property_match_string(mac_node, "pcs-handle-names", "xfi"); // code takes neither branch and falls through if (err >= 0) { (...) } else if (err != -EINVAL && err != -ENODATA) { goto _return_fm_mac_free; } (...) /* For compatibility, if pcs-handle-names is missing, we assume this * phy is the first one in pcsphy-handle */ err = of_property_match_string(mac_node, "pcs-handle-names", "sgmii"); if (err == -EINVAL || err == -ENODATA) pcs = memac_pcs_create(mac_node, 0); // code takes this branch else if (err < 0) goto _return_fm_mac_free; else pcs = memac_pcs_create(mac_node, err); // A default PCS is created and saved in "pcs" // This determination fails and mistakenly saves the default PCS // memac->sgmii_pcs instead of memac->xfi_pcs, because at this // stage, mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER. if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) memac->xfi_pcs = pcs; else memac->sgmii_pcs = pcs; In other words, in the absence of pcs-handle-names, the default xfi_pcs assignment logic only works when in the device tree we have PHY_INTERFACE_MODE_XGMII. By reversing the order between the fallback xfi_pcs assignment and the "xgmii" overwrite with "10gbase-r", we are able to support both values in the device tree, with identical behavior. Currently, it is impossible to make the s/xgmii/10gbase-r/ device tree conversion, because it would break forward compatibility (new device tree with old kernel). The only way to modify existing device trees to phy-interface-mode = "10gbase-r" is to fix stable kernels to accept this value and handle it properly. One reason why the conversion is desirable is because with pre-phylink kernels, the Aquantia PHY driver used to warn about the improper use of PHY_INTERFACE_MODE_XGMII [1]. It is best to have a single (latest) device tree that works with all supported stable kernel versions. Note that the blamed commit does not constitute a regression per se. Older stable kernels like 6.1 still do not work with "10gbase-r", but for a different reason. That is a battle for another time. [1] https://lore.kernel.org/netdev/20240214-ls1046-dts-use-10gbase-r-v1-1-8c2d68547393@concurrent-rt.com/ Fixes: 5d93cfcf7360 ("net: dpaa: Convert to phylink") Signed-off-by: Vladimir Oltean Reviewed-by: Sean Anderson Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_memac.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 9ba15d3183d7..758535adc9ff 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1073,6 +1073,14 @@ int memac_initialization(struct mac_device *mac_dev, unsigned long capabilities; unsigned long *supported; + /* The internal connection to the serdes is XGMII, but this isn't + * really correct for the phy mode (which is the external connection). + * However, this is how all older device trees say that they want + * 10GBASE-R (aka XFI), so just convert it for them. + */ + if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; + mac_dev->phylink_ops = &memac_mac_ops; mac_dev->set_promisc = memac_set_promiscuous; mac_dev->change_addr = memac_modify_mac_address; @@ -1139,7 +1147,7 @@ int memac_initialization(struct mac_device *mac_dev, * (and therefore that xfi_pcs cannot be set). If we are defaulting to * XGMII, assume this is for XFI. Otherwise, assume it is for SGMII. */ - if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER) memac->xfi_pcs = pcs; else memac->sgmii_pcs = pcs; @@ -1153,14 +1161,6 @@ int memac_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } - /* The internal connection to the serdes is XGMII, but this isn't - * really correct for the phy mode (which is the external connection). - * However, this is how all older device trees say that they want - * 10GBASE-R (aka XFI), so just convert it for them. - */ - if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) - mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; - /* TODO: The following interface modes are supported by (some) hardware * but not by this driver: * - 1000BASE-KX -- cgit v1.2.3 From b0b1210bc150fbd741b4b9fce8a24541306b40fc Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 21 Feb 2024 11:27:29 +0900 Subject: ps3/gelic: Fix SKB allocation Commit 3ce4f9c3fbb3 ("net/ps3_gelic_net: Add gelic_descr structures") of 6.8-rc1 had a copy-and-paste error where the pointer that holds the allocated SKB (struct gelic_descr.skb) was set to NULL after the SKB was allocated. This resulted in a kernel panic when the SKB pointer was accessed. This fix moves the initialization of the gelic_descr to before the SKB is allocated. Reported-by: sambat goson Fixes: 3ce4f9c3fbb3 ("net/ps3_gelic_net: Add gelic_descr structures") Signed-off-by: Geoff Levand Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index d5b75af163d3..c1b0d35c8d05 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -384,18 +384,18 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) dev_info(ctodev(card), "%s: ERROR status\n", __func__); - descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); - if (!descr->skb) { - descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ - return -ENOMEM; - } descr->hw_regs.dmac_cmd_status = 0; descr->hw_regs.result_size = 0; descr->hw_regs.valid_size = 0; descr->hw_regs.data_error = 0; descr->hw_regs.payload.dev_addr = 0; descr->hw_regs.payload.size = 0; - descr->skb = NULL; + + descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); + if (!descr->skb) { + descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ + return -ENOMEM; + } offset = ((unsigned long)descr->skb->data) & (GELIC_NET_RXBUF_ALIGN - 1); -- cgit v1.2.3 From e5ca263508f7e9d2cf711edf3258d11ca087885c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:24 +0100 Subject: drm/bridge: aux-hpd: separate allocation and registration Combining allocation and registration is an anti-pattern that should be avoided. Add two new functions for allocating and registering an dp-hpd bridge with a proper 'devm' prefix so that it is clear that these are device managed interfaces. devm_drm_dp_hpd_bridge_alloc() devm_drm_dp_hpd_bridge_add() The new interface will be used to fix a use-after-free bug in the Qualcomm PMIC GLINK driver and may prevent similar issues from being introduced elsewhere. The existing drm_dp_hpd_bridge_register() is reimplemented using the above and left in place for now. Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-3-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 67 +++++++++++++++++++++++++-------- include/drm/bridge/aux-bridge.h | 15 ++++++++ 2 files changed, 67 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 9e71daf95bde..6886db2d9e00 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -30,16 +30,13 @@ static void drm_aux_hpd_bridge_release(struct device *dev) kfree(adev); } -static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +static void drm_aux_hpd_bridge_free_adev(void *_adev) { - struct auxiliary_device *adev = _adev; - - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); + auxiliary_device_uninit(_adev); } /** - * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge * @parent: device instance providing this bridge * @np: device node pointer corresponding to this bridge instance * @@ -47,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev) * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is * able to send the HPD events. * - * Return: device instance that will handle created bridge or an error code - * encoded into the pointer. + * Return: bridge auxiliary device pointer or an error pointer */ -struct device *drm_dp_hpd_bridge_register(struct device *parent, - struct device_node *np) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np) { struct auxiliary_device *adev; int ret; @@ -82,13 +77,55 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, return ERR_PTR(ret); } - ret = auxiliary_device_add(adev); - if (ret) { - auxiliary_device_uninit(adev); + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev); + if (ret) return ERR_PTR(ret); - } - ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + return adev; +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc); + +static void drm_aux_hpd_bridge_del_adev(void *_adev) +{ + auxiliary_device_delete(_adev); +} + +/** + * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge + * @dev: struct device to tie registration lifetime to + * @adev: bridge auxiliary device to be registered + * + * Returns: zero on success or a negative errno + */ +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev) +{ + int ret; + + ret = auxiliary_device_add(adev); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev); +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add); + +/** + * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Return: device instance that will handle created bridge or an error pointer + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = devm_drm_dp_hpd_bridge_alloc(parent, np); + if (IS_ERR(adev)) + return ERR_CAST(adev); + + ret = devm_drm_dp_hpd_bridge_add(parent, adev); if (ret) return ERR_PTR(ret); diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index c4c423e97f06..4453906105ca 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -9,6 +9,8 @@ #include +struct auxiliary_device; + #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) int drm_aux_bridge_register(struct device *parent); #else @@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent) #endif #if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np); +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev); struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np); void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status); #else +static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, + struct device_node *np) +{ + return NULL; +} + +static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev) +{ + return 0; +} + static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) { -- cgit v1.2.3 From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:25 +0100 Subject: soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free A recent DRM series purporting to simplify support for "transparent bridges" and handling of probe deferrals ironically exposed a use-after-free issue on pmic_glink_altmode probe deferral. This has manifested itself as the display subsystem occasionally failing to initialise and NULL-pointer dereferences during boot of machines like the Lenovo ThinkPad X13s. Specifically, the dp-hpd bridge is currently registered before all resources have been acquired which means that it can also be deregistered on probe deferrals. In the meantime there is a race window where the new aux bridge driver (or PHY driver previously) may have looked up the dp-hpd bridge and stored a (non-reference-counted) pointer to the bridge which is about to be deallocated. When the display controller is later initialised, this triggers a use-after-free when attaching the bridges: dp -> aux -> dp-hpd (freed) which may, for example, result in the freed bridge failing to attach: [drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16 or a NULL-pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Call trace: drm_bridge_attach+0x70/0x1a8 [drm] drm_aux_bridge_attach+0x24/0x38 [aux_bridge] drm_bridge_attach+0x80/0x1a8 [drm] dp_bridge_init+0xa8/0x15c [msm] msm_dp_modeset_init+0x28/0xc4 [msm] The DRM bridge implementation is clearly fragile and implicitly built on the assumption that bridges may never go away. In this case, the fix is to move the bridge registration in the pmic_glink_altmode driver to after all resources have been looked up. Incidentally, with the new dp-hpd bridge implementation, which registers child devices, this is also a requirement due to a long-standing issue in driver core that can otherwise lead to a probe deferral loop (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). [DB: slightly fixed commit message by adding the word 'commit'] Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support") Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE") Cc: # 6.3 Cc: Bjorn Andersson Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink_altmode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 5fcd0fdd2faa..b3808fc24c69 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -76,7 +76,7 @@ struct pmic_glink_altmode_port { struct work_struct work; - struct device *bridge; + struct auxiliary_device *bridge; enum typec_orientation orientation; u16 svid; @@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work) else pmic_glink_altmode_enable_usb(altmode, alt_port); - drm_aux_hpd_bridge_notify(alt_port->bridge, + drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, alt_port->hpd_state ? connector_status_connected : connector_status_disconnected); @@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, alt_port->index = port; INIT_WORK(&alt_port->work, pmic_glink_altmode_worker); - alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode)); + alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode)); if (IS_ERR(alt_port->bridge)) { fwnode_handle_put(fwnode); return PTR_ERR(alt_port->bridge); @@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, } } + for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) { + alt_port = &altmode->ports[port]; + if (!alt_port->bridge) + continue; + + ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge); + if (ret) + return ret; + } + altmode->client = devm_pmic_glink_register_client(dev, altmode->owner_id, pmic_glink_altmode_callback, -- cgit v1.2.3 From f79ee78767ca60e7a2c89eacd2dbdf237d97e838 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 17 Feb 2024 16:02:26 +0100 Subject: soc: qcom: pmic_glink: Fix boot when QRTR=m We need to bail out before adding/removing devices if we are going to -EPROBE_DEFER. Otherwise boot can get stuck in a probe deferral loop due to a long-standing issue in driver core (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). Deregistering the altmode child device can potentially also trigger bugs in the DRM bridge implementation, which does not expect bridges to go away. [DB: slightly fixed commit message by adding the word 'commit'] Suggested-by: Dmitry Baryshkov Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20231213210644.8702-1-robdclark@gmail.com [ johan: rebase on 6.8-rc4, amend commit message and mention DRM ] Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: # 6.3 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-5-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index f4bfd24386f1..f913e9bd57ed 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev) pg->client_mask = *match_data; + pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); + if (IS_ERR(pg->pdr)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), + "failed to initialize pdr\n"); + return ret; + } + if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) { ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi"); if (ret) - return ret; + goto out_release_pdr_handle; } if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) { ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode"); @@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev) goto out_release_altmode_aux; } - pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); - if (IS_ERR(pg->pdr)) { - ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n"); - goto out_release_aux_devices; - } - service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd"); if (IS_ERR(service)) { ret = dev_err_probe(&pdev->dev, PTR_ERR(service), "failed adding pdr lookup for charger_pd\n"); - goto out_release_pdr_handle; + goto out_release_aux_devices; } mutex_lock(&__pmic_glink_lock); @@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev) return 0; -out_release_pdr_handle: - pdr_handle_release(pg->pdr); out_release_aux_devices: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT)) pmic_glink_del_aux_device(pg, &pg->ps_aux); @@ -311,6 +310,8 @@ out_release_altmode_aux: out_release_ucsi_aux: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) pmic_glink_del_aux_device(pg, &pg->ucsi_aux); +out_release_pdr_handle: + pdr_handle_release(pg->pdr); return ret; } -- cgit v1.2.3 From 2a770cdc4382b457ca3d43d03f0f0064f905a0d0 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 20 Feb 2024 11:12:07 +0800 Subject: tun: Fix xdp_rxq_info's queue_index when detaching When a queue(tfile) is detached, we only update tfile's queue_index, but do not update xdp_rxq_info's queue_index. This patch fixes it. Fixes: 8bf5c4ee1889 ("tun: setup xdp_rxq_info") Signed-off-by: Yunjian Wang Link: https://lore.kernel.org/r/1708398727-46308-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4a4f8c8e79fa..8f95a562b8d0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); -- cgit v1.2.3 From f0a0fc10abb062d122db5ac4ed42f6d1ca342649 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sat, 17 Feb 2024 13:30:10 -0800 Subject: cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back There is a loophole in pstate limit clamping for the intel_cpufreq CPU frequency scaling driver (intel_pstate in passive mode), schedutil CPU frequency scaling governor, HWP (HardWare Pstate) control enabled, when the adjust_perf call back path is used. Fix it. Fixes: a365ab6b9dfb cpufreq: intel_pstate: Implement the ->adjust_perf() callback Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ca94e60e705a..79619227ea51 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2987,6 +2987,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; -- cgit v1.2.3 From 60caa8b33bd682a9ed99d1fc3f91d74e1acc9922 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Mon, 19 Feb 2024 15:48:25 +0000 Subject: iio: imu: inv_mpu6050: fix FIFO parsing when empty Now that we are reading the full FIFO in the interrupt handler, it is possible to have an emply FIFO since we are still receiving 1 interrupt per data. Handle correctly this case instead of having an error causing a reset of the FIFO. Fixes: 0829edc43e0a ("iio: imu: inv_mpu6050: read the full fifo when processing data") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240219154825.90656-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 66d4ba088e70..d4f9b5d8d28d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -109,6 +109,8 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) /* compute and process only all complete datum */ nb = fifo_count / bytes_per_datum; fifo_count = nb * bytes_per_datum; + if (nb == 0) + goto end_session; /* Each FIFO data contains all sensors, so same number for FIFO and sensor data */ fifo_period = NSEC_PER_SEC / INV_MPU6050_DIVIDER_TO_FIFO_RATE(st->chip_config.divider); inv_sensors_timestamp_interrupt(&st->timestamp, fifo_period, nb, nb, pf->timestamp); -- cgit v1.2.3 From a9dd9ba323114f366eb07f1d9630822f8df6cbb2 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Mon, 19 Feb 2024 20:13:59 +0100 Subject: iio: pressure: Fixes BMP38x and BMP390 SPI support According to the datasheet of BMP38x and BMP390 devices, for an SPI read operation the first byte that is returned needs to be dropped, and the rest of the bytes are the actual data returned from the sensor. Reviewed-by: Andy Shevchenko Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family") Signed-off-by: Vasileios Amoiridis Acked-by: Angel Iglesias Link: https://lore.kernel.org/r/20240219191359.18367-1-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-spi.c | 50 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index e8a5fed07e88..a444d4b2978b 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -4,6 +4,7 @@ * * Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c */ +#include #include #include #include @@ -35,6 +36,34 @@ static int bmp280_regmap_spi_read(void *context, const void *reg, return spi_write_then_read(spi, reg, reg_size, val, val_size); } +static int bmp380_regmap_spi_read(void *context, const void *reg, + size_t reg_size, void *val, size_t val_size) +{ + struct spi_device *spi = to_spi_device(context); + u8 rx_buf[4]; + ssize_t status; + + /* + * Maximum number of consecutive bytes read for a temperature or + * pressure measurement is 3. + */ + if (val_size > 3) + return -EINVAL; + + /* + * According to the BMP3xx datasheets, for a basic SPI read opertion, + * the first byte needs to be dropped and the rest are the requested + * data. + */ + status = spi_write_then_read(spi, reg, 1, rx_buf, val_size + 1); + if (status) + return status; + + memcpy(val, rx_buf + 1, val_size); + + return 0; +} + static struct regmap_bus bmp280_regmap_bus = { .write = bmp280_regmap_spi_write, .read = bmp280_regmap_spi_read, @@ -42,10 +71,19 @@ static struct regmap_bus bmp280_regmap_bus = { .val_format_endian_default = REGMAP_ENDIAN_BIG, }; +static struct regmap_bus bmp380_regmap_bus = { + .write = bmp280_regmap_spi_write, + .read = bmp380_regmap_spi_read, + .read_flag_mask = BIT(7), + .reg_format_endian_default = REGMAP_ENDIAN_BIG, + .val_format_endian_default = REGMAP_ENDIAN_BIG, +}; + static int bmp280_spi_probe(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); const struct bmp280_chip_info *chip_info; + struct regmap_bus *bmp_regmap_bus; struct regmap *regmap; int ret; @@ -58,8 +96,18 @@ static int bmp280_spi_probe(struct spi_device *spi) chip_info = spi_get_device_match_data(spi); + switch (chip_info->chip_id[0]) { + case BMP380_CHIP_ID: + case BMP390_CHIP_ID: + bmp_regmap_bus = &bmp380_regmap_bus; + break; + default: + bmp_regmap_bus = &bmp280_regmap_bus; + break; + } + regmap = devm_regmap_init(&spi->dev, - &bmp280_regmap_bus, + bmp_regmap_bus, &spi->dev, chip_info->regmap_config); if (IS_ERR(regmap)) { -- cgit v1.2.3 From a28d893eb3270cf62c10dd8777af0d8452cdc072 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:21 +0100 Subject: md: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-4-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/md/dm.c | 23 +++++++++++++---------- drivers/md/md.c | 12 ++++++------ drivers/md/md.h | 2 +- include/linux/device-mapper.h | 2 +- 4 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8dcabf84d866..87de5b5682ad 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -726,7 +726,8 @@ static struct table_device *open_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode) { struct table_device *td; - struct bdev_handle *bdev_handle; + struct file *bdev_file; + struct block_device *bdev; u64 part_off; int r; @@ -735,34 +736,36 @@ static struct table_device *open_table_device(struct mapped_device *md, return ERR_PTR(-ENOMEM); refcount_set(&td->count, 1); - bdev_handle = bdev_open_by_dev(dev, mode, _dm_claim_ptr, NULL); - if (IS_ERR(bdev_handle)) { - r = PTR_ERR(bdev_handle); + bdev_file = bdev_file_open_by_dev(dev, mode, _dm_claim_ptr, NULL); + if (IS_ERR(bdev_file)) { + r = PTR_ERR(bdev_file); goto out_free_td; } + bdev = file_bdev(bdev_file); + /* * We can be called before the dm disk is added. In that case we can't * register the holder relation here. It will be done once add_disk was * called. */ if (md->disk->slave_dir) { - r = bd_link_disk_holder(bdev_handle->bdev, md->disk); + r = bd_link_disk_holder(bdev, md->disk); if (r) goto out_blkdev_put; } td->dm_dev.mode = mode; - td->dm_dev.bdev = bdev_handle->bdev; - td->dm_dev.bdev_handle = bdev_handle; - td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev, &part_off, + td->dm_dev.bdev = bdev; + td->dm_dev.bdev_file = bdev_file; + td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL); format_dev_t(td->dm_dev.name, dev); list_add(&td->list, &md->table_devices); return td; out_blkdev_put: - bdev_release(bdev_handle); + fput(bdev_file); out_free_td: kfree(td); return ERR_PTR(r); @@ -775,7 +778,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md { if (md->disk->slave_dir) bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); - bdev_release(td->dm_dev.bdev_handle); + fput(td->dm_dev.bdev_file); put_dax(td->dm_dev.dax_dev); list_del(&td->list); kfree(td); diff --git a/drivers/md/md.c b/drivers/md/md.c index 2266358d8074..0653584db63b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2578,7 +2578,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - bdev_release(rdev->bdev_handle); + fput(rdev->bdev_file); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -3773,16 +3773,16 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe if (err) goto out_clear_rdev; - rdev->bdev_handle = bdev_open_by_dev(newdev, + rdev->bdev_file = bdev_file_open_by_dev(newdev, BLK_OPEN_READ | BLK_OPEN_WRITE, super_format == -2 ? &claim_rdev : rdev, NULL); - if (IS_ERR(rdev->bdev_handle)) { + if (IS_ERR(rdev->bdev_file)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); - err = PTR_ERR(rdev->bdev_handle); + err = PTR_ERR(rdev->bdev_file); goto out_clear_rdev; } - rdev->bdev = rdev->bdev_handle->bdev; + rdev->bdev = file_bdev(rdev->bdev_file); kobject_init(&rdev->kobj, &rdev_ktype); @@ -3813,7 +3813,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe return rdev; out_blkdev_put: - bdev_release(rdev->bdev_handle); + fput(rdev->bdev_file); out_clear_rdev: md_rdev_clear(rdev); out_free_rdev: diff --git a/drivers/md/md.h b/drivers/md/md.h index 8d881cc59799..a079ee9b6190 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -59,7 +59,7 @@ struct md_rdev { */ struct block_device *meta_bdev; struct block_device *bdev; /* block device handle */ - struct bdev_handle *bdev_handle; /* Handle from open for bdev */ + struct file *bdev_file; /* Handle from open for bdev */ struct page *sb_page, *bb_page; int sb_loaded; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 772ab4d74d94..82b2195efaca 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,7 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; -- cgit v1.2.3 From 20e6a8d0dcdc70c0a79ace13e9915e5d4fd7c611 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:25 +0100 Subject: drbd: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-8-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_nl.c | 58 +++++++++++++++++++++---------------------- 2 files changed, 31 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c21e3732759e..94dc0a235919 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -524,9 +524,9 @@ struct drbd_md { struct drbd_backing_dev { struct block_device *backing_bdev; - struct bdev_handle *backing_bdev_handle; + struct file *backing_bdev_file; struct block_device *md_bdev; - struct bdev_handle *md_bdev_handle; + struct file *f_md_bdev; struct drbd_md md; struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */ sector_t known_size; /* last known size of that backing device */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43747a1aae43..6aed67278e8b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1635,45 +1635,45 @@ success: return 0; } -static struct bdev_handle *open_backing_dev(struct drbd_device *device, +static struct file *open_backing_dev(struct drbd_device *device, const char *bdev_path, void *claim_ptr, bool do_bd_link) { - struct bdev_handle *handle; + struct file *file; int err = 0; - handle = bdev_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, - claim_ptr, NULL); - if (IS_ERR(handle)) { + file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, + claim_ptr, NULL); + if (IS_ERR(file)) { drbd_err(device, "open(\"%s\") failed with %ld\n", - bdev_path, PTR_ERR(handle)); - return handle; + bdev_path, PTR_ERR(file)); + return file; } if (!do_bd_link) - return handle; + return file; - err = bd_link_disk_holder(handle->bdev, device->vdisk); + err = bd_link_disk_holder(file_bdev(file), device->vdisk); if (err) { - bdev_release(handle); + fput(file); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); - handle = ERR_PTR(err); + file = ERR_PTR(err); } - return handle; + return file; } static int open_backing_devices(struct drbd_device *device, struct disk_conf *new_disk_conf, struct drbd_backing_dev *nbc) { - struct bdev_handle *handle; + struct file *file; - handle = open_backing_dev(device, new_disk_conf->backing_dev, device, + file = open_backing_dev(device, new_disk_conf->backing_dev, device, true); - if (IS_ERR(handle)) + if (IS_ERR(file)) return ERR_OPEN_DISK; - nbc->backing_bdev = handle->bdev; - nbc->backing_bdev_handle = handle; + nbc->backing_bdev = file_bdev(file); + nbc->backing_bdev_file = file; /* * meta_dev_idx >= 0: external fixed size, possibly multiple @@ -1683,7 +1683,7 @@ static int open_backing_devices(struct drbd_device *device, * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - handle = open_backing_dev(device, new_disk_conf->meta_dev, + file = open_backing_dev(device, new_disk_conf->meta_dev, /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, * if potentially shared with other drbd minors */ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, @@ -1691,21 +1691,21 @@ static int open_backing_devices(struct drbd_device *device, * as would happen with internal metadata. */ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); - if (IS_ERR(handle)) + if (IS_ERR(file)) return ERR_OPEN_MD_DISK; - nbc->md_bdev = handle->bdev; - nbc->md_bdev_handle = handle; + nbc->md_bdev = file_bdev(file); + nbc->f_md_bdev = file; return NO_ERROR; } static void close_backing_dev(struct drbd_device *device, - struct bdev_handle *handle, bool do_bd_unlink) + struct file *bdev_file, bool do_bd_unlink) { - if (!handle) + if (!bdev_file) return; if (do_bd_unlink) - bd_unlink_disk_holder(handle->bdev, device->vdisk); - bdev_release(handle); + bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk); + fput(bdev_file); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1713,9 +1713,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev_handle, + close_backing_dev(device, ldev->f_md_bdev, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev_handle, true); + close_backing_dev(device, ldev->backing_bdev_file, true); kfree(ldev->disk_conf); kfree(ldev); @@ -2131,9 +2131,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev_handle, + close_backing_dev(device, nbc->f_md_bdev, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev_handle, true); + close_backing_dev(device, nbc->backing_bdev_file, true); kfree(nbc); } kfree(new_disk_conf); -- cgit v1.2.3 From 05fb1dbc821f3016a52621ccd4530c269b626130 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:26 +0100 Subject: pktcdvd: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-9-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/block/pktcdvd.c | 68 ++++++++++++++++++++++++------------------------- include/linux/pktcdvd.h | 4 +-- 2 files changed, 36 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d56d972aadb3..c21444716e43 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -340,8 +340,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n", pd->disk->disk_name, MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), - MAJOR(pd->bdev_handle->bdev->bd_dev), - MINOR(pd->bdev_handle->bdev->bd_dev)); + MAJOR(file_bdev(pd->bdev_file)->bd_dev), + MINOR(file_bdev(pd->bdev_file)->bd_dev)); } mutex_unlock(&ctl_mutex); return n; @@ -438,7 +438,7 @@ static int pkt_seq_show(struct seq_file *m, void *p) int states[PACKET_NUM_STATES]; seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, - pd->bdev_handle->bdev); + file_bdev(pd->bdev_file)); seq_printf(m, "\nSettings:\n"); seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); @@ -715,7 +715,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod */ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) { - struct request_queue *q = bdev_get_queue(pd->bdev_handle->bdev); + struct request_queue *q = bdev_get_queue(file_bdev(pd->bdev_file)); struct scsi_cmnd *scmd; struct request *rq; int ret = 0; @@ -1048,7 +1048,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) continue; bio = pkt->r_bios[f]; - bio_init(bio, pd->bdev_handle->bdev, bio->bi_inline_vecs, 1, + bio_init(bio, file_bdev(pd->bdev_file), bio->bi_inline_vecs, 1, REQ_OP_READ); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); bio->bi_end_io = pkt_end_io_read; @@ -1264,7 +1264,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) struct device *ddev = disk_to_dev(pd->disk); int f; - bio_init(pkt->w_bio, pd->bdev_handle->bdev, pkt->w_bio->bi_inline_vecs, + bio_init(pkt->w_bio, file_bdev(pd->bdev_file), pkt->w_bio->bi_inline_vecs, pkt->frames, REQ_OP_WRITE); pkt->w_bio->bi_iter.bi_sector = pkt->sector; pkt->w_bio->bi_end_io = pkt_end_io_packet_write; @@ -2162,20 +2162,20 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) int ret; long lba; struct request_queue *q; - struct bdev_handle *bdev_handle; + struct file *bdev_file; /* * We need to re-open the cdrom device without O_NONBLOCK to be able * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev_handle = bdev_open_by_dev(pd->bdev_handle->bdev->bd_dev, + bdev_file = bdev_file_open_by_dev(file_bdev(pd->bdev_file)->bd_dev, BLK_OPEN_READ, pd, NULL); - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); goto out; } - pd->open_bdev_handle = bdev_handle; + pd->f_open_bdev = bdev_file; ret = pkt_get_last_written(pd, &lba); if (ret) { @@ -2184,9 +2184,9 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) } set_capacity(pd->disk, lba << 2); - set_capacity_and_notify(pd->bdev_handle->bdev->bd_disk, lba << 2); + set_capacity_and_notify(file_bdev(pd->bdev_file)->bd_disk, lba << 2); - q = bdev_get_queue(pd->bdev_handle->bdev); + q = bdev_get_queue(file_bdev(pd->bdev_file)); if (write) { ret = pkt_open_write(pd); if (ret) @@ -2218,7 +2218,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) return 0; out_putdev: - bdev_release(bdev_handle); + fput(bdev_file); out: return ret; } @@ -2237,8 +2237,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - bdev_release(pd->open_bdev_handle); - pd->open_bdev_handle = NULL; + fput(pd->f_open_bdev); + pd->f_open_bdev = NULL; pkt_shrink_pktlist(pd); } @@ -2326,7 +2326,7 @@ static void pkt_end_io_read_cloned(struct bio *bio) static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct bio *cloned_bio = bio_alloc_clone(pd->bdev_handle->bdev, bio, + struct bio *cloned_bio = bio_alloc_clone(file_bdev(pd->bdev_file), bio, GFP_NOIO, &pkt_bio_set); struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); @@ -2497,7 +2497,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { struct device *ddev = disk_to_dev(pd->disk); int i; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct scsi_device *sdev; if (pd->pkt_dev == dev) { @@ -2508,9 +2508,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) struct pktcdvd_device *pd2 = pkt_devs[i]; if (!pd2) continue; - if (pd2->bdev_handle->bdev->bd_dev == dev) { + if (file_bdev(pd2->bdev_file)->bd_dev == dev) { dev_err(ddev, "%pg already setup\n", - pd2->bdev_handle->bdev); + file_bdev(pd2->bdev_file)); return -EBUSY; } if (pd2->pkt_dev == dev) { @@ -2519,13 +2519,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } } - bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, + bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL, NULL); - if (IS_ERR(bdev_handle)) - return PTR_ERR(bdev_handle); - sdev = scsi_device_from_queue(bdev_handle->bdev->bd_disk->queue); + if (IS_ERR(bdev_file)) + return PTR_ERR(bdev_file); + sdev = scsi_device_from_queue(file_bdev(bdev_file)->bd_disk->queue); if (!sdev) { - bdev_release(bdev_handle); + fput(bdev_file); return -EINVAL; } put_device(&sdev->sdev_gendev); @@ -2533,8 +2533,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - pd->bdev_handle = bdev_handle; - set_blocksize(bdev_handle->bdev, CD_FRAMESIZE); + pd->bdev_file = bdev_file; + set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE); pkt_init_queue(pd); @@ -2546,11 +2546,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd); - dev_notice(ddev, "writer mapped to %pg\n", bdev_handle->bdev); + dev_notice(ddev, "writer mapped to %pg\n", file_bdev(bdev_file)); return 0; out_mem: - bdev_release(bdev_handle); + fput(bdev_file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return -ENOMEM; @@ -2605,9 +2605,9 @@ static unsigned int pkt_check_events(struct gendisk *disk, if (!pd) return 0; - if (!pd->bdev_handle) + if (!pd->bdev_file) return 0; - attached_disk = pd->bdev_handle->bdev->bd_disk; + attached_disk = file_bdev(pd->bdev_file)->bd_disk; if (!attached_disk || !attached_disk->fops->check_events) return 0; return attached_disk->fops->check_events(attached_disk, clearing); @@ -2692,7 +2692,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) goto out_mem2; /* inherit events of the host device */ - disk->events = pd->bdev_handle->bdev->bd_disk->events; + disk->events = file_bdev(pd->bdev_file)->bd_disk->events; ret = add_disk(disk); if (ret) @@ -2757,7 +2757,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - bdev_release(pd->bdev_handle); + fput(pd->bdev_file); remove_proc_entry(pd->disk->disk_name, pkt_proc); dev_notice(ddev, "writer unmapped\n"); @@ -2784,7 +2784,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); if (pd) { - ctrl_cmd->dev = new_encode_dev(pd->bdev_handle->bdev->bd_dev); + ctrl_cmd->dev = new_encode_dev(file_bdev(pd->bdev_file)->bd_dev); ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); } else { ctrl_cmd->dev = 0; diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 79594aeb160d..2f1b952d596a 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,9 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct bdev_handle *bdev_handle; /* dev attached */ + struct file *bdev_file; /* dev attached */ /* handle acquired for bdev during pkt_open_dev() */ - struct bdev_handle *open_bdev_handle; + struct file *f_open_bdev; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; -- cgit v1.2.3 From a34606a9aa3d36d178a5ecf31a0299b88102cf12 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:27 +0100 Subject: rnbd: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-10-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/block/rnbd/rnbd-srv.c | 28 ++++++++++++++-------------- drivers/block/rnbd/rnbd-srv.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 3a0d5dcec6f2..f6e3a3c4b76c 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -145,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc(sess_dev->bdev_handle->bdev, 1, + bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1, rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); if (bio_add_page(bio, virt_to_page(data), datalen, offset_in_page(data)) != datalen) { @@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ - bdev_release(sess_dev->bdev_handle); + fput(sess_dev->bdev_file); mutex_lock(&sess_dev->dev->lock); list_del(&sess_dev->dev_list); if (!sess_dev->readonly) @@ -534,7 +534,7 @@ rnbd_srv_get_or_create_srv_dev(struct block_device *bdev, static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { - struct block_device *bdev = sess_dev->bdev_handle->bdev; + struct block_device *bdev = file_bdev(sess_dev->bdev_file); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = cpu_to_le32(sess_dev->device_id); @@ -560,7 +560,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, static struct rnbd_srv_sess_dev * rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, const struct rnbd_msg_open *open_msg, - struct bdev_handle *handle, bool readonly, + struct file *bdev_file, bool readonly, struct rnbd_srv_dev *srv_dev) { struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess); @@ -572,7 +572,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname)); - sdev->bdev_handle = handle; + sdev->bdev_file = bdev_file; sdev->sess = srv_sess; sdev->dev = srv_dev; sdev->readonly = readonly; @@ -678,7 +678,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, struct rnbd_srv_dev *srv_dev; struct rnbd_srv_sess_dev *srv_sess_dev; const struct rnbd_msg_open *open_msg = msg; - struct bdev_handle *bdev_handle; + struct file *bdev_file; blk_mode_t open_flags = BLK_OPEN_READ; char *full_path; struct rnbd_msg_open_rsp *rsp = data; @@ -716,15 +716,15 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto reject; } - bdev_handle = bdev_open_by_path(full_path, open_flags, NULL, NULL); - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); + bdev_file = bdev_file_open_by_path(full_path, open_flags, NULL, NULL); + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %pe\n", - full_path, srv_sess->sessname, bdev_handle); + full_path, srv_sess->sessname, bdev_file); goto free_path; } - srv_dev = rnbd_srv_get_or_create_srv_dev(bdev_handle->bdev, srv_sess, + srv_dev = rnbd_srv_get_or_create_srv_dev(file_bdev(bdev_file), srv_sess, open_msg->access_mode); if (IS_ERR(srv_dev)) { pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %pe\n", @@ -734,7 +734,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, } srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, - bdev_handle, + bdev_file, open_msg->access_mode == RNBD_ACCESS_RO, srv_dev); if (IS_ERR(srv_sess_dev)) { @@ -750,7 +750,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, */ mutex_lock(&srv_dev->lock); if (!srv_dev->dev_kobj.state_in_sysfs) { - ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev_handle->bdev); + ret = rnbd_srv_create_dev_sysfs(srv_dev, file_bdev(bdev_file)); if (ret) { mutex_unlock(&srv_dev->lock); rnbd_srv_err(srv_sess_dev, @@ -793,7 +793,7 @@ srv_dev_put: } rnbd_put_srv_dev(srv_dev); blkdev_put: - bdev_release(bdev_handle); + fput(bdev_file); free_path: kfree(full_path); reject: diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index 343cc682b617..18d873808b8d 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -46,7 +46,7 @@ struct rnbd_srv_dev { struct rnbd_srv_sess_dev { /* Entry inside rnbd_srv_dev struct */ struct list_head dev_list; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct rnbd_srv_session *sess; struct rnbd_srv_dev *dev; struct kobject kobj; -- cgit v1.2.3 From 217759bbb95db7998528eb65914c3e20d890dff5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:28 +0100 Subject: xen: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-11-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/block/xen-blkback/blkback.c | 4 ++-- drivers/block/xen-blkback/common.h | 4 ++-- drivers/block/xen-blkback/xenbus.c | 37 ++++++++++++++++++------------------- 3 files changed, 22 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4defd7f387c7..944576d582fb 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -465,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, } req->dev = vbd->pdevice; - req->bdev = vbd->bdev_handle->bdev; + req->bdev = file_bdev(vbd->bdev_file); rc = 0; out: @@ -969,7 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int err = 0; int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; - struct block_device *bdev = blkif->vbd.bdev_handle->bdev; + struct block_device *bdev = file_bdev(blkif->vbd.bdev_file); struct phys_req preq; xen_blkif_get(blkif); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1432c83183d0..b427d54bc120 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -221,7 +221,7 @@ struct xen_vbd { unsigned char type; /* phys device that this vbd maps to. */ u32 pdevice; - struct bdev_handle *bdev_handle; + struct file *bdev_file; /* Cached size parameter. */ sector_t size; unsigned int flush_support:1; @@ -360,7 +360,7 @@ struct pending_req { }; -#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev) +#define vbd_sz(_v) bdev_nr_sectors(file_bdev((_v)->bdev_file)) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e34219ea2b05..0621878940ae 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -81,7 +81,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) int i; /* Not ready to connect? */ - if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_handle) + if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_file) return; /* Already connected? */ @@ -99,13 +99,12 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) return; } - err = sync_blockdev(blkif->vbd.bdev_handle->bdev); + err = sync_blockdev(file_bdev(blkif->vbd.bdev_file)); if (err) { xenbus_dev_error(blkif->be->dev, err, "block flush"); return; } - invalidate_inode_pages2( - blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping); + invalidate_inode_pages2(blkif->vbd.bdev_file->f_mapping); for (i = 0; i < blkif->nr_rings; i++) { ring = &blkif->rings[i]; @@ -473,9 +472,9 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) static void xen_vbd_free(struct xen_vbd *vbd) { - if (vbd->bdev_handle) - bdev_release(vbd->bdev_handle); - vbd->bdev_handle = NULL; + if (vbd->bdev_file) + fput(vbd->bdev_file); + vbd->bdev_file = NULL; } static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, @@ -483,7 +482,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, int cdrom) { struct xen_vbd *vbd; - struct bdev_handle *bdev_handle; + struct file *bdev_file; vbd = &blkif->vbd; vbd->handle = handle; @@ -492,17 +491,17 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->pdevice = MKDEV(major, minor); - bdev_handle = bdev_open_by_dev(vbd->pdevice, vbd->readonly ? + bdev_file = bdev_file_open_by_dev(vbd->pdevice, vbd->readonly ? BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { pr_warn("xen_vbd_create: device %08x could not be opened\n", vbd->pdevice); return -ENOENT; } - vbd->bdev_handle = bdev_handle; - if (vbd->bdev_handle->bdev->bd_disk == NULL) { + vbd->bdev_file = bdev_file; + if (file_bdev(vbd->bdev_file)->bd_disk == NULL) { pr_warn("xen_vbd_create: device %08x doesn't exist\n", vbd->pdevice); xen_vbd_free(vbd); @@ -510,14 +509,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, } vbd->size = vbd_sz(vbd); - if (cdrom || disk_to_cdi(vbd->bdev_handle->bdev->bd_disk)) + if (cdrom || disk_to_cdi(file_bdev(vbd->bdev_file)->bd_disk)) vbd->type |= VDISK_CDROM; - if (vbd->bdev_handle->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + if (file_bdev(vbd->bdev_file)->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - if (bdev_write_cache(bdev_handle->bdev)) + if (bdev_write_cache(file_bdev(bdev_file))) vbd->flush_support = true; - if (bdev_max_secure_erase_sectors(bdev_handle->bdev)) + if (bdev_max_secure_erase_sectors(file_bdev(bdev_file))) vbd->discard_secure = true; pr_debug("Successful creation of handle=%04x (dom=%u)\n", @@ -570,7 +569,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info struct xen_blkif *blkif = be->blkif; int err; int state = 0; - struct block_device *bdev = be->blkif->vbd.bdev_handle->bdev; + struct block_device *bdev = file_bdev(be->blkif->vbd.bdev_file); if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1)) return; @@ -932,7 +931,7 @@ again: } err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", (unsigned long)bdev_logical_block_size( - be->blkif->vbd.bdev_handle->bdev)); + file_bdev(be->blkif->vbd.bdev_file))); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sector-size", dev->nodename); @@ -940,7 +939,7 @@ again: } err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", bdev_physical_block_size( - be->blkif->vbd.bdev_handle->bdev)); + file_bdev(be->blkif->vbd.bdev_file))); if (err) xenbus_dev_error(dev, err, "writing %s/physical-sector-size", dev->nodename); -- cgit v1.2.3 From be914f8fd282ecce5d8e2dd098e628dcc7ad533a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:29 +0100 Subject: zram: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-12-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/block/zram/zram_drv.c | 26 +++++++++++++------------- drivers/block/zram/zram_drv.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6772e0c654fa..d96b3851b5d3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -426,11 +426,11 @@ static void reset_bdev(struct zram *zram) if (!zram->backing_dev) return; - bdev_release(zram->bdev_handle); + fput(zram->bdev_file); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->bdev_handle = NULL; + zram->bdev_file = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; @@ -476,7 +476,7 @@ static ssize_t backing_dev_store(struct device *dev, struct address_space *mapping; unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; - struct bdev_handle *bdev_handle = NULL; + struct file *bdev_file = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -513,11 +513,11 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev_handle = bdev_open_by_dev(inode->i_rdev, + bdev_file = bdev_file_open_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL); - if (IS_ERR(bdev_handle)) { - err = PTR_ERR(bdev_handle); - bdev_handle = NULL; + if (IS_ERR(bdev_file)) { + err = PTR_ERR(bdev_file); + bdev_file = NULL; goto out; } @@ -531,7 +531,7 @@ static ssize_t backing_dev_store(struct device *dev, reset_bdev(zram); - zram->bdev_handle = bdev_handle; + zram->bdev_file = bdev_file; zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; @@ -544,8 +544,8 @@ static ssize_t backing_dev_store(struct device *dev, out: kvfree(bitmap); - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); if (backing_dev) filp_close(backing_dev, NULL); @@ -587,7 +587,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page, { struct bio *bio; - bio = bio_alloc(zram->bdev_handle->bdev, 1, parent->bi_opf, GFP_NOIO); + bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); __bio_add_page(bio, page, PAGE_SIZE, 0); bio_chain(bio, parent); @@ -703,7 +703,7 @@ static ssize_t writeback_store(struct device *dev, continue; } - bio_init(&bio, zram->bdev_handle->bdev, &bio_vec, 1, + bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); __bio_add_page(&bio, page, PAGE_SIZE, 0); @@ -785,7 +785,7 @@ static void zram_sync_read(struct work_struct *work) struct bio_vec bv; struct bio bio; - bio_init(&bio, zw->zram->bdev_handle->bdev, &bv, 1, REQ_OP_READ); + bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); zw->error = submit_bio_wait(&bio); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 3b94d12f41b4..37bf29f34d26 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -132,7 +132,7 @@ struct zram { spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; - struct bdev_handle *bdev_handle; + struct file *bdev_file; unsigned long *bitmap; unsigned long nr_pages; #endif -- cgit v1.2.3 From 3789fb8746dc0f484db2d6f48b390f0f535d5120 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:30 +0100 Subject: bcache: port block device access to files Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-13-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/md/bcache/bcache.h | 4 +-- drivers/md/bcache/super.c | 74 +++++++++++++++++++++++----------------------- 2 files changed, 39 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6ae2329052c9..4e6afa89921f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -300,7 +300,7 @@ struct cached_dev { struct list_head list; struct bcache_device disk; struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct cache_sb sb; struct cache_sb_disk *sb_disk; @@ -423,7 +423,7 @@ struct cache { struct kobject kobj; struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct task_struct *alloc_thread; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dc3f50f69714..d00b3abab133 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,8 +1369,8 @@ static CLOSURE_CALLBACK(cached_dev_free) if (dc->sb_disk) put_page(virt_to_page(dc->sb_disk)); - if (dc->bdev_handle) - bdev_release(dc->bdev_handle); + if (dc->bdev_file) + fput(dc->bdev_file); wake_up(&unregister_wait); @@ -1445,7 +1445,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) /* Cached device - bcache superblock */ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct bdev_handle *bdev_handle, + struct file *bdev_file, struct cached_dev *dc) { const char *err = "cannot allocate memory"; @@ -1453,8 +1453,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, int ret = -ENOMEM; memcpy(&dc->sb, sb, sizeof(struct cache_sb)); - dc->bdev_handle = bdev_handle; - dc->bdev = bdev_handle->bdev; + dc->bdev_file = bdev_file; + dc->bdev = file_bdev(bdev_file); dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -2218,8 +2218,8 @@ void bch_cache_release(struct kobject *kobj) if (ca->sb_disk) put_page(virt_to_page(ca->sb_disk)); - if (ca->bdev_handle) - bdev_release(ca->bdev_handle); + if (ca->bdev_file) + fput(ca->bdev_file); kfree(ca); module_put(THIS_MODULE); @@ -2339,18 +2339,18 @@ err_free: } static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct bdev_handle *bdev_handle, + struct file *bdev_file, struct cache *ca) { const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); - ca->bdev_handle = bdev_handle; - ca->bdev = bdev_handle->bdev; + ca->bdev_file = bdev_file; + ca->bdev = file_bdev(bdev_file); ca->sb_disk = sb_disk; - if (bdev_max_discard_sectors((bdev_handle->bdev))) + if (bdev_max_discard_sectors(file_bdev(bdev_file))) ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); @@ -2361,20 +2361,20 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; - pr_notice("error %pg: %s\n", bdev_handle->bdev, err); + pr_notice("error %pg: %s\n", file_bdev(bdev_file), err); /* * If we failed here, it means ca->kobj is not initialized yet, * kobject_put() won't be called and there is no chance to - * call bdev_release() to bdev in bch_cache_release(). So - * we explicitly call bdev_release() here. + * call fput() to bdev in bch_cache_release(). So + * we explicitly call fput() on the block device here. */ - bdev_release(bdev_handle); + fput(bdev_file); return ret; } - if (kobject_add(&ca->kobj, bdev_kobj(bdev_handle->bdev), "bcache")) { + if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) { pr_notice("error %pg: error calling kobject_add\n", - bdev_handle->bdev); + file_bdev(bdev_file)); ret = -ENOMEM; goto out; } @@ -2388,7 +2388,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %pg\n", ca->bdev_handle->bdev); + pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file)); out: kobject_put(&ca->kobj); @@ -2446,7 +2446,7 @@ struct async_reg_args { char *path; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct bdev_handle *bdev_handle; + struct file *bdev_file; void *holder; }; @@ -2457,7 +2457,7 @@ static void register_bdev_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev_handle, + if (register_bdev(args->sb, args->sb_disk, args->bdev_file, args->holder) < 0) fail = true; mutex_unlock(&bch_register_lock); @@ -2478,7 +2478,7 @@ static void register_cache_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev_handle, + if (register_cache(args->sb, args->sb_disk, args->bdev_file, args->holder)) fail = true; @@ -2516,7 +2516,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct bdev_handle *bdev_handle, *bdev_handle2; + struct file *bdev_file, *bdev_file2; void *holder = NULL; ssize_t ret; bool async_registration = false; @@ -2549,15 +2549,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev_handle = bdev_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); - if (IS_ERR(bdev_handle)) + bdev_file = bdev_file_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev_file)) goto out_free_sb; err = "failed to set blocksize"; - if (set_blocksize(bdev_handle->bdev, 4096)) + if (set_blocksize(file_bdev(bdev_file), 4096)) goto out_blkdev_put; - err = read_super(sb, bdev_handle->bdev, &sb_disk); + err = read_super(sb, file_bdev(bdev_file), &sb_disk); if (err) goto out_blkdev_put; @@ -2569,13 +2569,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, } /* Now reopen in exclusive mode with proper holder */ - bdev_handle2 = bdev_open_by_dev(bdev_handle->bdev->bd_dev, + bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, holder, NULL); - bdev_release(bdev_handle); - bdev_handle = bdev_handle2; - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); - bdev_handle = NULL; + fput(bdev_file); + bdev_file = bdev_file2; + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); + bdev_file = NULL; if (ret == -EBUSY) { dev_t dev; @@ -2610,7 +2610,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, args->path = path; args->sb = sb; args->sb_disk = sb_disk; - args->bdev_handle = bdev_handle; + args->bdev_file = bdev_file; args->holder = holder; register_device_async(args); /* No wait and returns to user space */ @@ -2619,14 +2619,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (SB_IS_BDEV(sb)) { mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev_handle, holder); + ret = register_bdev(sb, sb_disk, bdev_file, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev_handle, holder); + ret = register_cache(sb, sb_disk, bdev_file, holder); if (ret) goto out_free_sb; } @@ -2642,8 +2642,8 @@ out_free_holder: out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); out_free_sb: kfree(sb); out_free_path: -- cgit v1.2.3 From f9d8c3c4236ef2a1c15b01200b9d164e2c5a54c7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:31 +0100 Subject: block2mtd: port device access to files Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-14-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/mtd/devices/block2mtd.c | 46 +++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index aa44a23ec045..97a00ec9a4d4 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -37,7 +37,7 @@ /* Info for the block device */ struct block2mtd_dev { struct list_head list; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct mtd_info mtd; struct mutex write_mutex; }; @@ -55,8 +55,7 @@ static struct page *page_read(struct address_space *mapping, pgoff_t index) /* erase a specified part of the device */ static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len) { - struct address_space *mapping = - dev->bdev_handle->bdev->bd_inode->i_mapping; + struct address_space *mapping = dev->bdev_file->f_mapping; struct page *page; pgoff_t index = to >> PAGE_SHIFT; // page index int pages = len >> PAGE_SHIFT; @@ -106,8 +105,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct block2mtd_dev *dev = mtd->priv; - struct address_space *mapping = - dev->bdev_handle->bdev->bd_inode->i_mapping; + struct address_space *mapping = dev->bdev_file->f_mapping; struct page *page; pgoff_t index = from >> PAGE_SHIFT; int offset = from & (PAGE_SIZE-1); @@ -142,8 +140,7 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf, loff_t to, size_t len, size_t *retlen) { struct page *page; - struct address_space *mapping = - dev->bdev_handle->bdev->bd_inode->i_mapping; + struct address_space *mapping = dev->bdev_file->f_mapping; pgoff_t index = to >> PAGE_SHIFT; // page index int offset = to & ~PAGE_MASK; // page offset int cpylen; @@ -198,7 +195,7 @@ static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len, static void block2mtd_sync(struct mtd_info *mtd) { struct block2mtd_dev *dev = mtd->priv; - sync_blockdev(dev->bdev_handle->bdev); + sync_blockdev(file_bdev(dev->bdev_file)); return; } @@ -210,10 +207,9 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) kfree(dev->mtd.name); - if (dev->bdev_handle) { - invalidate_mapping_pages( - dev->bdev_handle->bdev->bd_inode->i_mapping, 0, -1); - bdev_release(dev->bdev_handle); + if (dev->bdev_file) { + invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1); + fput(dev->bdev_file); } kfree(dev); @@ -223,10 +219,10 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) * This function is marked __ref because it calls the __init marked * early_lookup_bdev when called from the early boot code. */ -static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname, +static struct file __ref *mdtblock_early_get_bdev(const char *devname, blk_mode_t mode, int timeout, struct block2mtd_dev *dev) { - struct bdev_handle *bdev_handle = ERR_PTR(-ENODEV); + struct file *bdev_file = ERR_PTR(-ENODEV); #ifndef MODULE int i; @@ -234,7 +230,7 @@ static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname, * We can't use early_lookup_bdev from a running system. */ if (system_state >= SYSTEM_RUNNING) - return bdev_handle; + return bdev_file; /* * We might not have the root device mounted at this point. @@ -253,20 +249,20 @@ static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname, wait_for_device_probe(); if (!early_lookup_bdev(devname, &devt)) { - bdev_handle = bdev_open_by_dev(devt, mode, dev, NULL); - if (!IS_ERR(bdev_handle)) + bdev_file = bdev_file_open_by_dev(devt, mode, dev, NULL); + if (!IS_ERR(bdev_file)) break; } } #endif - return bdev_handle; + return bdev_file; } static struct block2mtd_dev *add_device(char *devname, int erase_size, char *label, int timeout) { const blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; struct block2mtd_dev *dev; char *name; @@ -279,16 +275,16 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, return NULL; /* Get a handle on the device */ - bdev_handle = bdev_open_by_path(devname, mode, dev, NULL); - if (IS_ERR(bdev_handle)) - bdev_handle = mdtblock_early_get_bdev(devname, mode, timeout, + bdev_file = bdev_file_open_by_path(devname, mode, dev, NULL); + if (IS_ERR(bdev_file)) + bdev_file = mdtblock_early_get_bdev(devname, mode, timeout, dev); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { pr_err("error: cannot open device %s\n", devname); goto err_free_block2mtd; } - dev->bdev_handle = bdev_handle; - bdev = bdev_handle->bdev; + dev->bdev_file = bdev_file; + bdev = file_bdev(bdev_file); if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { pr_err("attempting to use an MTD device as a block device\n"); -- cgit v1.2.3 From e9a7254b0339ef2b230784789ac28ef35e049253 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:32 +0100 Subject: nvme: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-15-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/nvme/target/io-cmd-bdev.c | 16 ++++++++-------- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index f11400a908f2..6426aac2634a 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -50,10 +50,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) void nvmet_bdev_ns_disable(struct nvmet_ns *ns) { - if (ns->bdev_handle) { - bdev_release(ns->bdev_handle); + if (ns->bdev_file) { + fput(ns->bdev_file); ns->bdev = NULL; - ns->bdev_handle = NULL; + ns->bdev_file = NULL; } } @@ -85,18 +85,18 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) if (ns->buffered_io) return -ENOTBLK; - ns->bdev_handle = bdev_open_by_path(ns->device_path, + ns->bdev_file = bdev_file_open_by_path(ns->device_path, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(ns->bdev_handle)) { - ret = PTR_ERR(ns->bdev_handle); + if (IS_ERR(ns->bdev_file)) { + ret = PTR_ERR(ns->bdev_file); if (ret != -ENOTBLK) { pr_err("failed to open block device %s: (%d)\n", ns->device_path, ret); } - ns->bdev_handle = NULL; + ns->bdev_file = NULL; return ret; } - ns->bdev = ns->bdev_handle->bdev; + ns->bdev = file_bdev(ns->bdev_file); ns->size = bdev_nr_bytes(ns->bdev); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6c8acebe1a1a..33e61b4f478b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -58,7 +58,7 @@ struct nvmet_ns { struct percpu_ref ref; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; struct file *file; bool readonly; -- cgit v1.2.3 From c8e108d80cd8b2f079a79c8131fe594f12abf7f3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:33 +0100 Subject: s390: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-16-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/s390/block/dasd.c | 10 +++++----- drivers/s390/block/dasd_genhd.c | 36 ++++++++++++++++++------------------ drivers/s390/block/dasd_int.h | 2 +- drivers/s390/block/dasd_ioctl.c | 2 +- 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 7327e81352e9..c833a7c7d7b2 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -412,7 +412,7 @@ dasd_state_ready_to_online(struct dasd_device * device) KOBJ_CHANGE); return 0; } - disk_uevent(device->block->bdev_handle->bdev->bd_disk, + disk_uevent(file_bdev(device->block->bdev_file)->bd_disk, KOBJ_CHANGE); } return 0; @@ -433,7 +433,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) device->state = DASD_STATE_READY; if (device->block && !(device->features & DASD_FEATURE_USERAW)) - disk_uevent(device->block->bdev_handle->bdev->bd_disk, + disk_uevent(file_bdev(device->block->bdev_file)->bd_disk, KOBJ_CHANGE); return 0; } @@ -3588,7 +3588,7 @@ int dasd_generic_set_offline(struct ccw_device *cdev) * in the other openers. */ if (device->block) { - max_count = device->block->bdev_handle ? 0 : -1; + max_count = device->block->bdev_file ? 0 : -1; open_count = atomic_read(&device->block->open_count); if (open_count > max_count) { if (open_count > 0) @@ -3634,8 +3634,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev) * so sync bdev first and then wait for our queues to become * empty */ - if (device->block && device->block->bdev_handle) - bdev_mark_dead(device->block->bdev_handle->bdev, false); + if (device->block && device->block->bdev_file) + bdev_mark_dead(file_bdev(device->block->bdev_file), false); dasd_schedule_device_bh(device); rc = wait_event_interruptible(shutdown_waitq, _wait_for_empty_queues(device)); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 55e3abe94cde..8bf2cf0ccc15 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -127,15 +127,15 @@ void dasd_gendisk_free(struct dasd_block *block) */ int dasd_scan_partitions(struct dasd_block *block) { - struct bdev_handle *bdev_handle; + struct file *bdev_file; int rc; - bdev_handle = bdev_open_by_dev(disk_devt(block->gdp), BLK_OPEN_READ, + bdev_file = bdev_file_open_by_dev(disk_devt(block->gdp), BLK_OPEN_READ, NULL, NULL); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { DBF_DEV_EVENT(DBF_ERR, block->base, "scan partitions error, blkdev_get returned %ld", - PTR_ERR(bdev_handle)); + PTR_ERR(bdev_file)); return -ENODEV; } @@ -147,15 +147,15 @@ int dasd_scan_partitions(struct dasd_block *block) "scan partitions error, rc %d", rc); /* - * Since the matching bdev_release() call to the - * bdev_open_by_path() in this function is not called before + * Since the matching fput() call to the + * bdev_file_open_by_path() in this function is not called before * dasd_destroy_partitions the offline open_count limit needs to be - * increased from 0 to 1. This is done by setting device->bdev_handle + * increased from 0 to 1. This is done by setting device->bdev_file * (see dasd_generic_set_offline). As long as the partition detection * is running no offline should be allowed. That is why the assignment - * to block->bdev_handle is done AFTER the BLKRRPART ioctl. + * to block->bdev_file is done AFTER the BLKRRPART ioctl. */ - block->bdev_handle = bdev_handle; + block->bdev_file = bdev_file; return 0; } @@ -165,21 +165,21 @@ int dasd_scan_partitions(struct dasd_block *block) */ void dasd_destroy_partitions(struct dasd_block *block) { - struct bdev_handle *bdev_handle; + struct file *bdev_file; /* - * Get the bdev_handle pointer from the device structure and clear - * device->bdev_handle to lower the offline open_count limit again. + * Get the bdev_file pointer from the device structure and clear + * device->bdev_file to lower the offline open_count limit again. */ - bdev_handle = block->bdev_handle; - block->bdev_handle = NULL; + bdev_file = block->bdev_file; + block->bdev_file = NULL; - mutex_lock(&bdev_handle->bdev->bd_disk->open_mutex); - bdev_disk_changed(bdev_handle->bdev->bd_disk, true); - mutex_unlock(&bdev_handle->bdev->bd_disk->open_mutex); + mutex_lock(&file_bdev(bdev_file)->bd_disk->open_mutex); + bdev_disk_changed(file_bdev(bdev_file)->bd_disk, true); + mutex_unlock(&file_bdev(bdev_file)->bd_disk->open_mutex); /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ - bdev_release(bdev_handle); + fput(bdev_file); } int dasd_gendisk_init(void) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1b1b8a41c4d4..aecd502aec51 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -650,7 +650,7 @@ struct dasd_block { struct gendisk *gdp; spinlock_t request_queue_lock; struct blk_mq_tag_set tag_set; - struct bdev_handle *bdev_handle; + struct file *bdev_file; atomic_t open_count; unsigned long blocks; /* size of volume in blocks */ diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 61b9675e2a67..de85a5e4e21b 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -537,7 +537,7 @@ static int __dasd_ioctl_information(struct dasd_block *block, * This must be hidden from user-space. */ dasd_info->open_count = atomic_read(&block->open_count); - if (!block->bdev_handle) + if (!block->bdev_file) dasd_info->open_count++; /* -- cgit v1.2.3 From 034f0cf8fdf91cad6d1c66720e0a72fadcc1f711 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:34 +0100 Subject: target: port block device access to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-17-adbd023e19cc@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- drivers/target/target_core_iblock.c | 18 +++++++++--------- drivers/target/target_core_iblock.h | 2 +- drivers/target/target_core_pscsi.c | 22 +++++++++++----------- drivers/target/target_core_pscsi.h | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 8eb9eb7ce5df..7f6ca8177845 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -91,7 +91,7 @@ static int iblock_configure_device(struct se_device *dev) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct request_queue *q; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bd; struct blk_integrity *bi; blk_mode_t mode = BLK_OPEN_READ; @@ -117,14 +117,14 @@ static int iblock_configure_device(struct se_device *dev) else dev->dev_flags |= DF_READ_ONLY; - bdev_handle = bdev_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev, + bdev_file = bdev_file_open_by_path(ib_dev->ibd_udev_path, mode, ib_dev, NULL); - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); goto out_free_bioset; } - ib_dev->ibd_bdev_handle = bdev_handle; - ib_dev->ibd_bd = bd = bdev_handle->bdev; + ib_dev->ibd_bdev_file = bdev_file; + ib_dev->ibd_bd = bd = file_bdev(bdev_file); q = bdev_get_queue(bd); @@ -180,7 +180,7 @@ static int iblock_configure_device(struct se_device *dev) return 0; out_blkdev_put: - bdev_release(ib_dev->ibd_bdev_handle); + fput(ib_dev->ibd_bdev_file); out_free_bioset: bioset_exit(&ib_dev->ibd_bio_set); out: @@ -205,8 +205,8 @@ static void iblock_destroy_device(struct se_device *dev) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); - if (ib_dev->ibd_bdev_handle) - bdev_release(ib_dev->ibd_bdev_handle); + if (ib_dev->ibd_bdev_file) + fput(ib_dev->ibd_bdev_file); bioset_exit(&ib_dev->ibd_bio_set); } diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index 683f9a55945b..91f6f4280666 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -32,7 +32,7 @@ struct iblock_dev { u32 ibd_flags; struct bio_set ibd_bio_set; struct block_device *ibd_bd; - struct bdev_handle *ibd_bdev_handle; + struct file *ibd_bdev_file; bool ibd_readonly; struct iblock_dev_plug *ibd_plug; } ____cacheline_aligned; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 41b7489d37ce..9aedd682d10c 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -352,7 +352,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); struct Scsi_Host *sh = sd->host; - struct bdev_handle *bdev_handle; + struct file *bdev_file; int ret; if (scsi_device_get(sd)) { @@ -366,18 +366,18 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) * Claim exclusive struct block_device access to struct scsi_device * for TYPE_DISK and TYPE_ZBC using supplied udev_path */ - bdev_handle = bdev_open_by_path(dev->udev_path, + bdev_file = bdev_file_open_by_path(dev->udev_path, BLK_OPEN_WRITE | BLK_OPEN_READ, pdv, NULL); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { pr_err("pSCSI: bdev_open_by_path() failed\n"); scsi_device_put(sd); - return PTR_ERR(bdev_handle); + return PTR_ERR(bdev_file); } - pdv->pdv_bdev_handle = bdev_handle; + pdv->pdv_bdev_file = bdev_file; ret = pscsi_add_device_to_list(dev, sd); if (ret) { - bdev_release(bdev_handle); + fput(bdev_file); scsi_device_put(sd); return ret; } @@ -564,9 +564,9 @@ static void pscsi_destroy_device(struct se_device *dev) * from pscsi_create_type_disk() */ if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) && - pdv->pdv_bdev_handle) { - bdev_release(pdv->pdv_bdev_handle); - pdv->pdv_bdev_handle = NULL; + pdv->pdv_bdev_file) { + fput(pdv->pdv_bdev_file); + pdv->pdv_bdev_file = NULL; } /* * For HBA mode PHV_LLD_SCSI_HOST_NO, release the reference @@ -994,8 +994,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); - if (pdv->pdv_bdev_handle) - return bdev_nr_sectors(pdv->pdv_bdev_handle->bdev); + if (pdv->pdv_bdev_file) + return bdev_nr_sectors(file_bdev(pdv->pdv_bdev_file)); return 0; } diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h index b0a3ef136592..9acaa21e4c78 100644 --- a/drivers/target/target_core_pscsi.h +++ b/drivers/target/target_core_pscsi.h @@ -37,7 +37,7 @@ struct pscsi_dev_virt { int pdv_channel_id; int pdv_target_id; int pdv_lun_id; - struct bdev_handle *pdv_bdev_handle; + struct file *pdv_bdev_file; struct scsi_device *pdv_sd; struct Scsi_Host *pdv_lld_host; } ____cacheline_aligned; -- cgit v1.2.3 From daec424cc57b33a28f8621eb7ac85f8bd327bd6b Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Mon, 19 Feb 2024 15:47:41 +0000 Subject: iio: imu: inv_mpu6050: fix frequency setting when chip is off Track correctly FIFO state and apply ODR change before starting the chip. Without the fix, you cannot change ODR more than 1 time when data buffering is off. This restriction on a single pending ODR change should only apply when the FIFO is on. Fixes: 111e1abd0045 ("iio: imu: inv_mpu6050: use the common inv_sensors timestamp module") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240219154741.90601-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index 676704f9151f..e6e6e94452a3 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c @@ -111,6 +111,7 @@ int inv_mpu6050_prepare_fifo(struct inv_mpu6050_state *st, bool enable) if (enable) { /* reset timestamping */ inv_sensors_timestamp_reset(&st->timestamp); + inv_sensors_timestamp_apply_odr(&st->timestamp, 0, 0, 0); /* reset FIFO */ d = st->chip_config.user_ctrl | INV_MPU6050_BIT_FIFO_RST; ret = regmap_write(st->map, st->reg->user_ctrl, d); @@ -184,6 +185,10 @@ static int inv_mpu6050_set_enable(struct iio_dev *indio_dev, bool enable) if (result) goto error_power_off; } else { + st->chip_config.gyro_fifo_enable = 0; + st->chip_config.accl_fifo_enable = 0; + st->chip_config.temp_fifo_enable = 0; + st->chip_config.magn_fifo_enable = 0; result = inv_mpu6050_prepare_fifo(st, false); if (result) goto error_power_off; -- cgit v1.2.3 From a1c9f508db2543aae59ea2378b07a026f6c917cf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Feb 2024 09:29:39 -0800 Subject: iio: pressure: dlhl60d: Initialize empty DLH bytes 3 bytes were being read but 4 were being written. Explicitly initialize the unused bytes to 0 and refactor the loop to use direct array indexing, which appears to silence a Clang false positive warning[1]. Indent improvement included for readability of the fixed code. Link: https://github.com/ClangBuiltLinux/linux/issues/2000 [1] Fixes: ac78c6aa4a5d ("iio: pressure: Add driver for DLH pressure sensors") Signed-off-by: Kees Cook Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240223172936.it.875-kees@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/dlhl60d.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iio/pressure/dlhl60d.c b/drivers/iio/pressure/dlhl60d.c index 28c8269ba65d..0bba4c5a8d40 100644 --- a/drivers/iio/pressure/dlhl60d.c +++ b/drivers/iio/pressure/dlhl60d.c @@ -250,18 +250,17 @@ static irqreturn_t dlh_trigger_handler(int irq, void *private) struct dlh_state *st = iio_priv(indio_dev); int ret; unsigned int chn, i = 0; - __be32 tmp_buf[2]; + __be32 tmp_buf[2] = { }; ret = dlh_start_capture_and_read(st); if (ret) goto out; for_each_set_bit(chn, indio_dev->active_scan_mask, - indio_dev->masklength) { - memcpy(tmp_buf + i, + indio_dev->masklength) { + memcpy(&tmp_buf[i++], &st->rx_buf[1] + chn * DLH_NUM_DATA_BYTES, DLH_NUM_DATA_BYTES); - i++; } iio_push_to_buffers(indio_dev, tmp_buf); -- cgit v1.2.3 From 1b926914bbe4e30cb32f268893ef7d82a85275b8 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:50 +0200 Subject: iio: accel: adxl367: fix DEVID read after reset regmap_read_poll_timeout() will not sleep before reading, causing the first read to return -ENXIO on I2C, since the chip does not respond to it while it is being reset. The datasheet specifies that a soft reset operation has a latency of 7.5ms. Add a 15ms sleep between reset and reading the DEVID register, and switch to a simple regmap_read() call. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-1-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 90b7ae6d42b7..484fe2e9fb17 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1429,9 +1429,11 @@ static int adxl367_verify_devid(struct adxl367_state *st) unsigned int val; int ret; - ret = regmap_read_poll_timeout(st->regmap, ADXL367_REG_DEVID, val, - val == ADXL367_DEVID_AD, 1000, 10000); + ret = regmap_read(st->regmap, ADXL367_REG_DEVID, &val); if (ret) + return dev_err_probe(st->dev, ret, "Failed to read dev id\n"); + + if (val != ADXL367_DEVID_AD) return dev_err_probe(st->dev, -ENODEV, "Invalid dev id 0x%02X, expected 0x%02X\n", val, ADXL367_DEVID_AD); @@ -1510,6 +1512,8 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, if (ret) return ret; + fsleep(15000); + ret = adxl367_verify_devid(st); if (ret) return ret; -- cgit v1.2.3 From 11dadb631007324c7a8bcb2650eda88ed2b9eed0 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 05:36:51 +0200 Subject: iio: accel: adxl367: fix I2C FIFO data register As specified in the datasheet, the I2C FIFO data register is 0x18, not 0x42. 0x42 was used by mistake when adapting the ADXL372 driver. Fix this mistake. Fixes: cbab791c5e2a ("iio: accel: add ADXL367 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207033657.206171-2-demonsingur@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c index b595fe94f3a3..62c74bdc0d77 100644 --- a/drivers/iio/accel/adxl367_i2c.c +++ b/drivers/iio/accel/adxl367_i2c.c @@ -11,7 +11,7 @@ #include "adxl367.h" -#define ADXL367_I2C_FIFO_DATA 0x42 +#define ADXL367_I2C_FIFO_DATA 0x18 struct adxl367_i2c_state { struct regmap *regmap; -- cgit v1.2.3 From cb2b7d6f8c96414e1ab63c5f6e89d1c66a8b1078 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 23 Feb 2024 13:44:42 +0530 Subject: drm/i915/hdcp: Move to direct reads for HDCP Even for MST scenarios we need to do direct reads only on the immediate downstream device the rest of the authentication is taken care by that device. Remote reads will only be used to check capability of the monitors in MST topology. --v2 -Add fixes tag [Ankit] -Derive aux where needed rather than through a function [Ankit] Fixes: ae4f902bb344 ("drm/i915/hdcp: Send the correct aux for DPMST HDCP scenario") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Signed-off-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240223081453.1576918-3-suraj.kandpal@intel.com (cherry picked from commit 287c0de8b29489cdb20957980ca08c33ae4a67b9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 31 ++++++++-------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 3a595cd433d4..defc90936317 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -330,23 +330,13 @@ static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { 0, 0 }, }; -static struct drm_dp_aux * -intel_dp_hdcp_get_aux(struct intel_connector *connector) -{ - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - - if (intel_encoder_is_mst(connector->encoder)) - return &connector->port->aux; - else - return &dig_port->dp.aux; -} - static int intel_dp_hdcp2_read_rx_status(struct intel_connector *connector, u8 *rx_status) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; ssize_t ret; ret = drm_dp_dpcd_read(aux, @@ -454,8 +444,9 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector, unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_write, len; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; const struct hdcp2_dp_msg_data *hdcp2_msg_data; - struct drm_dp_aux *aux; hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); if (!hdcp2_msg_data) @@ -463,8 +454,6 @@ int intel_dp_hdcp2_write_msg(struct intel_connector *connector, offset = hdcp2_msg_data->offset; - aux = intel_dp_hdcp_get_aux(connector); - /* No msg_id in DP HDCP2.2 msgs */ bytes_to_write = size - 1; byte++; @@ -490,7 +479,8 @@ static ssize_t get_receiver_id_list_rx_info(struct intel_connector *connector, u32 *dev_cnt, u8 *byte) { - struct drm_dp_aux *aux = intel_dp_hdcp_get_aux(connector); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; ssize_t ret; u8 *rx_info = byte; @@ -516,7 +506,7 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); struct intel_hdcp *hdcp = &connector->hdcp; - struct drm_dp_aux *aux; + struct drm_dp_aux *aux = &dig_port->dp.aux; unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_recv, len; @@ -530,8 +520,6 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, return -EINVAL; offset = hdcp2_msg_data->offset; - aux = intel_dp_hdcp_get_aux(connector); - ret = intel_dp_hdcp2_wait_for_msg(connector, hdcp2_msg_data); if (ret < 0) return ret; @@ -651,12 +639,11 @@ static int intel_dp_hdcp2_capable(struct intel_connector *connector, bool *capable) { - struct drm_dp_aux *aux; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct drm_dp_aux *aux = &dig_port->dp.aux; u8 rx_caps[3]; int ret; - aux = intel_dp_hdcp_get_aux(connector); - *capable = false; ret = drm_dp_dpcd_read(aux, DP_HDCP_2_2_REG_RX_CAPS_OFFSET, -- cgit v1.2.3 From 20dfa63d7379408edfcae8bda8ef5ea44d7b357f Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 23 Feb 2024 13:44:49 +0530 Subject: drm/i915/hdcp: Remove additional timing for reading mst hdcp message Now that we have moved back to direct reads the additional timing is not required hence this can be removed. --v2 -Add Fixes tag [Ankit] Fixes: 3974f9c17bb9 ("drm/i915/hdcp: Adjust timeout for read in DPMST Scenario") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Signed-off-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240223081453.1576918-10-suraj.kandpal@intel.com (cherry picked from commit 429ccbd1c39baefc6114b482ae98c188f007afcd) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index defc90936317..a2c075b76728 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -549,13 +549,8 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, /* Entire msg read timeout since initiate of msg read */ if (bytes_to_recv == size - 1 && hdcp2_msg_data->msg_read_timeout > 0) { - if (intel_encoder_is_mst(connector->encoder)) - msg_end = ktime_add_ms(ktime_get_raw(), - hdcp2_msg_data->msg_read_timeout * - connector->port->parent->num_ports); - else - msg_end = ktime_add_ms(ktime_get_raw(), - hdcp2_msg_data->msg_read_timeout); + msg_end = ktime_add_ms(ktime_get_raw(), + hdcp2_msg_data->msg_read_timeout); } ret = drm_dp_dpcd_read(aux, offset, -- cgit v1.2.3 From fe9f801355f0b47668419f30f1fac1cf4539e736 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Feb 2024 15:12:10 -0800 Subject: net: veth: clear GRO when clearing XDP even when down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit veth sets NETIF_F_GRO automatically when XDP is enabled, because both features use the same NAPI machinery. The logic to clear NETIF_F_GRO sits in veth_disable_xdp() which is called both on ndo_stop and when XDP is turned off. To avoid the flag from being cleared when the device is brought down, the clearing is skipped when IFF_UP is not set. Bringing the device down should indeed not modify its features. Unfortunately, this means that clearing is also skipped when XDP is disabled _while_ the device is down. And there's nothing on the open path to bring the device features back into sync. IOW if user enables XDP, disables it and then brings the device up we'll end up with a stray GRO flag set but no NAPI instances. We don't depend on the GRO flag on the datapath, so the datapath won't crash. We will crash (or hang), however, next time features are sync'ed (either by user via ethtool or peer changing its config). The GRO flag will go away, and veth will try to disable the NAPIs. But the open path never created them since XDP was off, the GRO flag was a stray. If NAPI was initialized before we'll hang in napi_disable(). If it never was we'll crash trying to stop uninitialized hrtimer. Move the GRO flag updates to the XDP enable / disable paths, instead of mixing them with the ndo_open / ndo_close paths. Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Reported-by: Thomas Gleixner Reported-by: syzbot+039399a9b96297ddedca@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- drivers/net/veth.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 578e36ea1589..a786be805709 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1208,14 +1208,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1235,18 +1227,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1654,6 +1637,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1669,6 +1660,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; -- cgit v1.2.3 From dd61b55d733eee9bbe51abe7ab0e6f2ce1fae332 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 22 Feb 2024 10:54:49 -0600 Subject: RAS/AMD/ATL: Fix bit overflow in denorm_addr_df4_np2() The hash_pa8 and hashed_bit values in denorm_addr_df4_np2() are currently defined as u8 types. These variables represent single bits. 'hash_pa8' is set based on logical AND operations using masks with more than 8 bits. So the calculated value will not fit in this variable. It will always be '0'. The 'hash_pa8' check later in the function will fail which produces incorrect results for some cases. Change these variables to bool type. This clarifies that they are single bit values. Also, this allows the compiler to ensure they hold the proper results. Remove an unnecessary shift operation. [ bp: Remove the unnecessary brackets in the else-branch of the hash_pa8 assignment. ] Fixes: 3f3174996be6 ("RAS: Introduce AMD Address Translation Library") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240222165449.23582-1-yazen.ghannam@amd.com --- drivers/ras/amd/atl/denormalize.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index 49a900e066f1..e279224288d6 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -545,7 +545,7 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) unsigned int mod_value, shift_value; u16 mask = df_cfg.component_id_mask; u64 temp_addr_a, temp_addr_b; - u8 hash_pa8, hashed_bit; + bool hash_pa8, hashed_bit; switch (ctx->map.intlv_mode) { case DF4_NPS4_3CHAN_HASH: @@ -577,8 +577,7 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); } else { - hash_pa8 = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask); - hash_pa8 >>= df_cfg.socket_id_shift; + hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; temp_addr_a = ctx->ret_addr; } -- cgit v1.2.3 From 86bf8cfda6d2a6720fa2e6e676c98f0882c9d3d7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 23 Feb 2024 16:03:33 +0100 Subject: drm/tegra: Remove existing framebuffer only if we support display Tegra DRM doesn't support display on Tegra234 and later, so make sure not to remove any existing framebuffers in that case. v2: - add comments explaining how this situation can come about - clear DRIVER_MODESET and DRIVER_ATOMIC feature bits Fixes: 6848c291a54f ("drm/aperture: Convert drivers to aperture interfaces") Signed-off-by: Thierry Reding Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240223150333.1401582-1-thierry.reding@gmail.com --- drivers/gpu/drm/tegra/drm.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb7..373bcd79257e 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1242,9 +1242,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = drm_dev_register(drm, 0); if (err < 0) -- cgit v1.2.3 From e4e535036173addff38d6b295a231a553d1e0d3a Mon Sep 17 00:00:00 2001 From: Changhuang Liang Date: Sun, 25 Feb 2024 21:50:25 -0800 Subject: irqchip: Add StarFive external interrupt controller Add StarFive external interrupt controller for JH8100 SoC. Signed-off-by: Changhuang Liang Signed-off-by: Thomas Gleixner Reviewed-by: Ley Foon Tan Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20240226055025.1669223-3-changhuang.liang@starfivetech.com --- MAINTAINERS | 6 + drivers/irqchip/Kconfig | 11 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-starfive-jh8100-intc.c | 207 +++++++++++++++++++++++++++++ 4 files changed, 225 insertions(+) create mode 100644 drivers/irqchip/irq-starfive-jh8100-intc.c (limited to 'drivers') diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..ef678f04c830 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20956,6 +20956,12 @@ F: Documentation/devicetree/bindings/phy/starfive,jh7110-usb-phy.yaml F: drivers/phy/starfive/phy-jh7110-pcie.c F: drivers/phy/starfive/phy-jh7110-usb.c +STARFIVE JH8100 EXTERNAL INTERRUPT CONTROLLER DRIVER +M: Changhuang Liang +S: Supported +F: Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml +F: drivers/irqchip/irq-starfive-jh8100-intc.c + STATIC BRANCH/CALL M: Peter Zijlstra M: Josh Poimboeuf diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index f7149d0f3d45..72c07a12f5e1 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -546,6 +546,17 @@ config SIFIVE_PLIC select IRQ_DOMAIN_HIERARCHY select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP +config STARFIVE_JH8100_INTC + bool "StarFive JH8100 External Interrupt Controller" + depends on ARCH_STARFIVE || COMPILE_TEST + default ARCH_STARFIVE + select IRQ_DOMAIN_HIERARCHY + help + This enables support for the INTC chip found in StarFive JH8100 + SoC. + + If you don't know what to do here, say Y. + config EXYNOS_IRQ_COMBINER bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index ffd945fe71aa..ec4a18380998 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o +obj-$(CONFIG_STARFIVE_JH8100_INTC) += irq-starfive-jh8100-intc.o obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o obj-$(CONFIG_IMX_INTMUX) += irq-imx-intmux.o obj-$(CONFIG_IMX_MU_MSI) += irq-imx-mu-msi.o diff --git a/drivers/irqchip/irq-starfive-jh8100-intc.c b/drivers/irqchip/irq-starfive-jh8100-intc.c new file mode 100644 index 000000000000..0f5837176e53 --- /dev/null +++ b/drivers/irqchip/irq-starfive-jh8100-intc.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * StarFive JH8100 External Interrupt Controller driver + * + * Copyright (C) 2023 StarFive Technology Co., Ltd. + * + * Author: Changhuang Liang + */ + +#define pr_fmt(fmt) "irq-starfive-jh8100: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STARFIVE_INTC_SRC0_CLEAR 0x10 +#define STARFIVE_INTC_SRC0_MASK 0x14 +#define STARFIVE_INTC_SRC0_INT 0x1c + +#define STARFIVE_INTC_SRC_IRQ_NUM 32 + +struct starfive_irq_chip { + void __iomem *base; + struct irq_domain *domain; + raw_spinlock_t lock; +}; + +static void starfive_intc_bit_set(struct starfive_irq_chip *irqc, + u32 reg, u32 bit_mask) +{ + u32 value; + + value = ioread32(irqc->base + reg); + value |= bit_mask; + iowrite32(value, irqc->base + reg); +} + +static void starfive_intc_bit_clear(struct starfive_irq_chip *irqc, + u32 reg, u32 bit_mask) +{ + u32 value; + + value = ioread32(irqc->base + reg); + value &= ~bit_mask; + iowrite32(value, irqc->base + reg); +} + +static void starfive_intc_unmask(struct irq_data *d) +{ + struct starfive_irq_chip *irqc = irq_data_get_irq_chip_data(d); + + raw_spin_lock(&irqc->lock); + starfive_intc_bit_clear(irqc, STARFIVE_INTC_SRC0_MASK, BIT(d->hwirq)); + raw_spin_unlock(&irqc->lock); +} + +static void starfive_intc_mask(struct irq_data *d) +{ + struct starfive_irq_chip *irqc = irq_data_get_irq_chip_data(d); + + raw_spin_lock(&irqc->lock); + starfive_intc_bit_set(irqc, STARFIVE_INTC_SRC0_MASK, BIT(d->hwirq)); + raw_spin_unlock(&irqc->lock); +} + +static struct irq_chip intc_dev = { + .name = "StarFive JH8100 INTC", + .irq_unmask = starfive_intc_unmask, + .irq_mask = starfive_intc_mask, +}; + +static int starfive_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_domain_set_info(d, irq, hwirq, &intc_dev, d->host_data, + handle_level_irq, NULL, NULL); + + return 0; +} + +static const struct irq_domain_ops starfive_intc_domain_ops = { + .xlate = irq_domain_xlate_onecell, + .map = starfive_intc_map, +}; + +static void starfive_intc_irq_handler(struct irq_desc *desc) +{ + struct starfive_irq_chip *irqc = irq_data_get_irq_handler_data(&desc->irq_data); + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned long value; + int hwirq; + + chained_irq_enter(chip, desc); + + value = ioread32(irqc->base + STARFIVE_INTC_SRC0_INT); + while (value) { + hwirq = ffs(value) - 1; + + generic_handle_domain_irq(irqc->domain, hwirq); + + starfive_intc_bit_set(irqc, STARFIVE_INTC_SRC0_CLEAR, BIT(hwirq)); + starfive_intc_bit_clear(irqc, STARFIVE_INTC_SRC0_CLEAR, BIT(hwirq)); + + __clear_bit(hwirq, &value); + } + + chained_irq_exit(chip, desc); +} + +static int __init starfive_intc_init(struct device_node *intc, + struct device_node *parent) +{ + struct starfive_irq_chip *irqc; + struct reset_control *rst; + struct clk *clk; + int parent_irq; + int ret; + + irqc = kzalloc(sizeof(*irqc), GFP_KERNEL); + if (!irqc) + return -ENOMEM; + + irqc->base = of_iomap(intc, 0); + if (!irqc->base) { + pr_err("Unable to map registers\n"); + ret = -ENXIO; + goto err_free; + } + + rst = of_reset_control_get_exclusive(intc, NULL); + if (IS_ERR(rst)) { + pr_err("Unable to get reset control %pe\n", rst); + ret = PTR_ERR(rst); + goto err_unmap; + } + + clk = of_clk_get(intc, 0); + if (IS_ERR(clk)) { + pr_err("Unable to get clock %pe\n", clk); + ret = PTR_ERR(clk); + goto err_reset_put; + } + + ret = reset_control_deassert(rst); + if (ret) + goto err_clk_put; + + ret = clk_prepare_enable(clk); + if (ret) + goto err_reset_assert; + + raw_spin_lock_init(&irqc->lock); + + irqc->domain = irq_domain_add_linear(intc, STARFIVE_INTC_SRC_IRQ_NUM, + &starfive_intc_domain_ops, irqc); + if (!irqc->domain) { + pr_err("Unable to create IRQ domain\n"); + ret = -EINVAL; + goto err_clk_disable; + } + + parent_irq = of_irq_get(intc, 0); + if (parent_irq < 0) { + pr_err("Failed to get main IRQ: %d\n", parent_irq); + ret = parent_irq; + goto err_remove_domain; + } + + irq_set_chained_handler_and_data(parent_irq, starfive_intc_irq_handler, + irqc); + + pr_info("Interrupt controller register, nr_irqs %d\n", + STARFIVE_INTC_SRC_IRQ_NUM); + + return 0; + +err_remove_domain: + irq_domain_remove(irqc->domain); +err_clk_disable: + clk_disable_unprepare(clk); +err_reset_assert: + reset_control_assert(rst); +err_clk_put: + clk_put(clk); +err_reset_put: + reset_control_put(rst); +err_unmap: + iounmap(irqc->base); +err_free: + kfree(irqc); + return ret; +} + +IRQCHIP_PLATFORM_DRIVER_BEGIN(starfive_intc) +IRQCHIP_MATCH("starfive,jh8100-intc", starfive_intc_init) +IRQCHIP_PLATFORM_DRIVER_END(starfive_intc) + +MODULE_DESCRIPTION("StarFive JH8100 External Interrupt Controller"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Changhuang Liang "); -- cgit v1.2.3 From 9d3f8a723c7950e56e0b95ab84b572caee29e065 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 21 Feb 2024 08:18:59 +0100 Subject: drm/ttm/tests: depend on UML || COMPILE_TEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least the device test requires that no other driver using TTM is loaded. So make those unit tests depend on UML || COMPILE_TEST to prevent people from trying them on bare metal. Signed-off-by: Christian König Acked-by: Alex Deucher Link: https://lore.kernel.org/all/20240219230116.77b8ad68@yea/ --- drivers/gpu/drm/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..c7edba18a6f0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -199,7 +199,7 @@ config DRM_TTM config DRM_TTM_KUNIT_TEST tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS default n - depends on DRM && KUNIT && MMU + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) select DRM_TTM select DRM_EXPORT_FOR_TESTS if m select DRM_KUNIT_TEST_HELPERS @@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST help Enables unit tests for TTM, a GPU memory manager subsystem used to manage memory buffers. This option is mostly useful for kernel - developers. + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. If in doubt, say "N". -- cgit v1.2.3 From 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Feb 2024 12:44:11 +0100 Subject: fbcon: always restore the old font data in fbcon_do_set_font() Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when vc_resize() failed) started restoring old font data upon failure (of vc_resize()). But it performs so only for user fonts. It means that the "system"/internal fonts are not restored at all. So in result, the very first call to fbcon_do_set_font() performs no restore at all upon failing vc_resize(). This can be reproduced by Syzkaller to crash the system on the next invocation of font_get(). It's rather hard to hit the allocation failure in vc_resize() on the first font_set(), but not impossible. Esp. if fault injection is used to aid the execution/failure. It was demonstrated by Sirius: BUG: unable to handle page fault for address: fffffffffffffff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 Call Trace: con_font_get drivers/tty/vt/vt.c:4558 [inline] con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 vfs_ioctl fs/ioctl.c:51 [inline] ... So restore the font data in any case, not only for user fonts. Note the later 'if' is now protected by 'old_userfont' and not 'old_data' as the latter is always set now. (And it is supposed to be non-NULL. Otherwise we would see the bug above again.) Signed-off-by: Jiri Slaby (SUSE) Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") Reported-and-tested-by: Ubisectech Sirius Cc: Ubisectech Sirius Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org --- drivers/video/fbdev/core/fbcon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1183e7a871f8..46823c2e2ba1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2399,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2437,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; -- cgit v1.2.3 From 2fe4ffc3ecdcb69d0e5aded5abf5367e3519bd04 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:36 +0800 Subject: md: merge the check of capabilities into md_ioctl_valid() There is no functional change. Just to make code cleaner. Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-2-linan666@huaweicloud.com --- drivers/md/md.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 75266c34b1f9..eedb9e343840 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7522,16 +7522,17 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static inline bool md_ioctl_valid(unsigned int cmd) +static inline int md_ioctl_valid(unsigned int cmd) { switch (cmd) { - case ADD_NEW_DISK: case GET_ARRAY_INFO: - case GET_BITMAP_FILE: case GET_DISK_INFO: + case RAID_VERSION: + return 0; + case ADD_NEW_DISK: + case GET_BITMAP_FILE: case HOT_ADD_DISK: case HOT_REMOVE_DISK: - case RAID_VERSION: case RESTART_ARRAY_RW: case RUN_ARRAY: case SET_ARRAY_INFO: @@ -7540,9 +7541,11 @@ static inline bool md_ioctl_valid(unsigned int cmd) case STOP_ARRAY: case STOP_ARRAY_RO: case CLUSTERED_DISK_NACK: - return true; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return 0; default: - return false; + return -ENOTTY; } } @@ -7602,18 +7605,9 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, struct mddev *mddev = NULL; bool did_set_md_closing = false; - if (!md_ioctl_valid(cmd)) - return -ENOTTY; - - switch (cmd) { - case RAID_VERSION: - case GET_ARRAY_INFO: - case GET_DISK_INFO: - break; - default: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - } + err = md_ioctl_valid(cmd); + if (err) + return err; /* * Commands dealing with the RAID driver but not any -- cgit v1.2.3 From 4e26593944e02446a75d911e11b759a9320c8273 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:37 +0800 Subject: md: changed the switch of RAID_VERSION to if There is only one case of this 'switch'. Change it to 'if'. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-3-linan666@huaweicloud.com --- drivers/md/md.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index eedb9e343840..3c8a0784cf6a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7613,12 +7613,8 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, * Commands dealing with the RAID driver but not any * particular array: */ - switch (cmd) { - case RAID_VERSION: - err = get_version(argp); - goto out; - default:; - } + if (cmd == RAID_VERSION) + return get_version(argp); /* * Commands creating/starting a new array: -- cgit v1.2.3 From 9dd8702e7cd28ebf076ff838933f29cf671165ec Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:38 +0800 Subject: md: clean up invalid BUG_ON in md_ioctl 'disk->private_data' is set to mddev in md_alloc() and never set to NULL, and users need to open mddev before submitting ioctl. So mddev must not have been freed during ioctl, and there is no need to check mddev here. Clean up it. Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-4-linan666@huaweicloud.com --- drivers/md/md.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3c8a0784cf6a..08170902d342 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7622,11 +7622,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, mddev = bdev->bd_disk->private_data; - if (!mddev) { - BUG(); - goto out; - } - /* Some actions do not requires the mutex */ switch (cmd) { case GET_ARRAY_INFO: -- cgit v1.2.3 From 91b26a39fb83cf370d94980609bf649c3c46993c Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:39 +0800 Subject: md: return directly before setting did_set_md_closing There is nothing to do at 'out' before setting 'did_set_md_closing' in md_ioctl(). Return directly, and it will help us to remove 'did_set_md_closing' later. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-5-linan666@huaweicloud.com --- drivers/md/md.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 08170902d342..67e7660191bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7626,26 +7626,19 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, switch (cmd) { case GET_ARRAY_INFO: if (!mddev->raid_disks && !mddev->external) - err = -ENODEV; - else - err = get_array_info(mddev, argp); - goto out; + return -ENODEV; + return get_array_info(mddev, argp); case GET_DISK_INFO: if (!mddev->raid_disks && !mddev->external) - err = -ENODEV; - else - err = get_disk_info(mddev, argp); - goto out; + return -ENODEV; + return get_disk_info(mddev, argp); case SET_DISK_FAULTY: - err = set_disk_faulty(mddev, new_decode_dev(arg)); - goto out; + return set_disk_faulty(mddev, new_decode_dev(arg)); case GET_BITMAP_FILE: - err = get_bitmap_file(mddev, argp); - goto out; - + return get_bitmap_file(mddev, argp); } if (cmd == HOT_REMOVE_DISK) @@ -7661,13 +7654,11 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, mutex_lock(&mddev->open_mutex); if (mddev->pers && atomic_read(&mddev->openers) > 1) { mutex_unlock(&mddev->open_mutex); - err = -EBUSY; - goto out; + return -EBUSY; } if (test_and_set_bit(MD_CLOSING, &mddev->flags)) { mutex_unlock(&mddev->open_mutex); - err = -EBUSY; - goto out; + return -EBUSY; } did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); -- cgit v1.2.3 From 9674f54e41fffaf06f6a60202e1fa4cc13de3cf5 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:40 +0800 Subject: md: Don't clear MD_CLOSING when the raid is about to stop The raid should not be opened anymore when it is about to be stopped. However, other processes can open it again if the flag MD_CLOSING is cleared before exiting. From now on, this flag will not be cleared when the raid will be stopped. Fixes: 065e519e71b2 ("md: MD_CLOSING needs to be cleared after called md_set_readonly or do_md_stop") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-6-linan666@huaweicloud.com --- drivers/md/md.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 67e7660191bd..9f97e4041425 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6265,7 +6265,15 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - mddev->flags = 0; + /* + * Don't clear MD_CLOSING, or mddev can be opened again. + * 'hold_active != 0' means mddev is still in the creation + * process and will be used later. + */ + if (mddev->hold_active) + mddev->flags = 0; + else + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -7603,7 +7611,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - bool did_set_md_closing = false; err = md_ioctl_valid(cmd); if (err) @@ -7660,7 +7667,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, mutex_unlock(&mddev->open_mutex); return -EBUSY; } - did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7802,7 +7808,7 @@ unlock: mddev_unlock(mddev); out: - if(did_set_md_closing) + if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) clear_bit(MD_CLOSING, &mddev->flags); return err; } -- cgit v1.2.3 From f74aaf614e84d8e5767a062e1172b4907c7b775e Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:41 +0800 Subject: md: factor out a helper to sync mddev There are no functional changes, prepare to sync mddev in array_state_store(). Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-7-linan666@huaweicloud.com --- drivers/md/md.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9f97e4041425..4d58e3496d16 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -529,6 +529,24 @@ void mddev_resume(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_resume); +/* sync bdev before setting device to readonly or stopping raid*/ +static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_num) +{ + mutex_lock(&mddev->open_mutex); + if (mddev->pers && atomic_read(&mddev->openers) > opener_num) { + mutex_unlock(&mddev->open_mutex); + return -EBUSY; + } + if (test_and_set_bit(MD_CLOSING, &mddev->flags)) { + mutex_unlock(&mddev->open_mutex); + return -EBUSY; + } + mutex_unlock(&mddev->open_mutex); + + sync_blockdev(mddev->gendisk->part0); + return 0; +} + /* * Generic flush handling for md */ @@ -7658,17 +7676,9 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, /* Need to flush page cache, and ensure no-one else opens * and writes */ - mutex_lock(&mddev->open_mutex); - if (mddev->pers && atomic_read(&mddev->openers) > 1) { - mutex_unlock(&mddev->open_mutex); - return -EBUSY; - } - if (test_and_set_bit(MD_CLOSING, &mddev->flags)) { - mutex_unlock(&mddev->open_mutex); - return -EBUSY; - } - mutex_unlock(&mddev->open_mutex); - sync_blockdev(bdev); + err = mddev_set_closing_and_sync_blockdev(mddev, 1); + if (err) + return err; } if (!md_is_rdwr(mddev)) -- cgit v1.2.3 From 99b902ac17253ee65d23012e2be390c026b77fa4 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:42 +0800 Subject: md: sync blockdev before stopping raid or setting readonly Commit a05b7ea03d72 ("md: avoid crash when stopping md array races with closing other open fds.") added sync_block before stopping raid and setting readonly. Later in commit 260fa034ef7a ("md: avoid deadlock when dirty buffers during md_stop.") it is moved to ioctl. array_state_store() was ignored. Add sync blockdev to array_state_store() now. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-8-linan666@huaweicloud.com --- drivers/md/md.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 4d58e3496d16..3b653e68db0c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4500,6 +4500,17 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) case broken: /* cannot be set */ case bad_word: return -EINVAL; + case clear: + case readonly: + case inactive: + case read_auto: + if (!mddev->pers || !md_is_rdwr(mddev)) + break; + /* write sysfs will not open mddev and opener should be 0 */ + err = mddev_set_closing_and_sync_blockdev(mddev, 0); + if (err) + return err; + break; default: break; } @@ -4599,6 +4610,11 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) sysfs_notify_dirent_safe(mddev->sysfs_state); } mddev_unlock(mddev); + + if (st == readonly || st == read_auto || st == inactive || + (err && st == clear)) + clear_bit(MD_CLOSING, &mddev->flags); + return err ?: len; } static struct md_sysfs_entry md_array_state = -- cgit v1.2.3 From 650b2e69ff6ab4de8d895e933f2b6fbacb1f8411 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:43 +0800 Subject: md: clean up openers check in do_md_stop() and md_set_readonly() Before stopping or setting readonly, mddev_set_closing_and_sync_blockdev() is always called to check the openers. So no longer need to check it again in do_md_stop() and md_set_readonly(). Clean it up. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-9-linan666@huaweicloud.com --- drivers/md/md.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3b653e68db0c..9b61e4cf796b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4482,8 +4482,8 @@ array_state_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", array_states[st]); } -static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev); -static int md_set_readonly(struct mddev *mddev, struct block_device *bdev); +static int do_md_stop(struct mddev *mddev, int ro); +static int md_set_readonly(struct mddev *mddev); static int restart_array(struct mddev *mddev); static ssize_t @@ -4544,14 +4544,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) case inactive: /* stop an active array, return 0 otherwise */ if (mddev->pers) - err = do_md_stop(mddev, 2, NULL); + err = do_md_stop(mddev, 2); break; case clear: - err = do_md_stop(mddev, 0, NULL); + err = do_md_stop(mddev, 0); break; case readonly: if (mddev->pers) - err = md_set_readonly(mddev, NULL); + err = md_set_readonly(mddev); else { mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); @@ -4561,7 +4561,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) case read_auto: if (mddev->pers) { if (md_is_rdwr(mddev)) - err = md_set_readonly(mddev, NULL); + err = md_set_readonly(mddev); else if (mddev->ro == MD_RDONLY) err = restart_array(mddev); if (err == 0) { @@ -6420,7 +6420,7 @@ void md_stop(struct mddev *mddev) EXPORT_SYMBOL_GPL(md_stop); -static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) +static int md_set_readonly(struct mddev *mddev) { int err = 0; int did_freeze = 0; @@ -6438,9 +6438,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); mddev_lock_nointr(mddev); - mutex_lock(&mddev->open_mutex); - if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); err = -EBUSY; goto out; @@ -6465,7 +6463,6 @@ out: sysfs_notify_dirent_safe(mddev->sysfs_state); } - mutex_unlock(&mddev->open_mutex); return err; } @@ -6473,8 +6470,7 @@ out: * 0 - completely stop and dis-assemble array * 2 - stop but do not disassemble array */ -static int do_md_stop(struct mddev *mddev, int mode, - struct block_device *bdev) +static int do_md_stop(struct mddev *mddev, int mode) { struct gendisk *disk = mddev->gendisk; struct md_rdev *rdev; @@ -6487,12 +6483,9 @@ static int do_md_stop(struct mddev *mddev, int mode, stop_sync_thread(mddev, true, false); - mutex_lock(&mddev->open_mutex); - if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || - mddev->sysfs_active || + if (mddev->sysfs_active || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); - mutex_unlock(&mddev->open_mutex); if (did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -6514,13 +6507,11 @@ static int do_md_stop(struct mddev *mddev, int mode, sysfs_unlink_rdev(mddev, rdev); set_capacity_and_notify(disk, 0); - mutex_unlock(&mddev->open_mutex); mddev->changed = 1; if (!md_is_rdwr(mddev)) mddev->ro = MD_RDWR; - } else - mutex_unlock(&mddev->open_mutex); + } /* * Free resources if final stop */ @@ -6566,7 +6557,7 @@ static void autorun_array(struct mddev *mddev) err = do_md_run(mddev); if (err) { pr_warn("md: do_md_run() returned %d\n", err); - do_md_stop(mddev, 0, NULL); + do_md_stop(mddev, 0); } } @@ -7735,11 +7726,11 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, goto unlock; case STOP_ARRAY: - err = do_md_stop(mddev, 0, bdev); + err = do_md_stop(mddev, 0); goto unlock; case STOP_ARRAY_RO: - err = md_set_readonly(mddev, bdev); + err = md_set_readonly(mddev); goto unlock; case HOT_REMOVE_DISK: -- cgit v1.2.3 From e9b0a1556ca2e18d67fa6452b2b99aa66b60ba6e Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 26 Feb 2024 11:14:44 +0800 Subject: md: check mddev->pers before calling md_set_readonly() If 'mddev->pers' is NULL, there is nothing to do in md_set_readonly(). Except for md_ioctl(), the other two callers of md_set_readonly() have already checked 'mddev->pers'. To simplify the code, move the check of 'mddev->pers' to the caller. Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240226031444.3606764-10-linan666@huaweicloud.com --- drivers/md/md.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9b61e4cf796b..48ae2b1cb57a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6420,6 +6420,7 @@ void md_stop(struct mddev *mddev) EXPORT_SYMBOL_GPL(md_stop); +/* ensure 'mddev->pers' exist before calling md_set_readonly() */ static int md_set_readonly(struct mddev *mddev) { int err = 0; @@ -6444,20 +6445,18 @@ static int md_set_readonly(struct mddev *mddev) goto out; } - if (mddev->pers) { - __md_stop_writes(mddev); - - if (mddev->ro == MD_RDONLY) { - err = -ENXIO; - goto out; - } + __md_stop_writes(mddev); - mddev->ro = MD_RDONLY; - set_disk_ro(mddev->gendisk, 1); + if (mddev->ro == MD_RDONLY) { + err = -ENXIO; + goto out; } + mddev->ro = MD_RDONLY; + set_disk_ro(mddev->gendisk, 1); + out: - if ((mddev->pers && !err) || did_freeze) { + if (!err || did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -7730,7 +7729,8 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, goto unlock; case STOP_ARRAY_RO: - err = md_set_readonly(mddev); + if (mddev->pers) + err = md_set_readonly(mddev); goto unlock; case HOT_REMOVE_DISK: -- cgit v1.2.3 From aeb004c0cd6958e910123a1607634401009c9539 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:45 -0800 Subject: iommufd: Fix iopt_access_list_id overwrite bug Syzkaller reported the following WARN_ON: WARNING: CPU: 1 PID: 4738 at drivers/iommu/iommufd/io_pagetable.c:1360 Call Trace: iommufd_access_change_ioas+0x2fe/0x4e0 iommufd_access_destroy_object+0x50/0xb0 iommufd_object_remove+0x2a3/0x490 iommufd_object_destroy_user iommufd_access_destroy+0x71/0xb0 iommufd_test_staccess_release+0x89/0xd0 __fput+0x272/0xb50 __fput_sync+0x4b/0x60 __do_sys_close __se_sys_close __x64_sys_close+0x8b/0x110 do_syscall_x64 The mismatch between the access pointer in the list and the passed-in pointer is resulting from an overwrite of access->iopt_access_list_id, in iopt_add_access(). Called from iommufd_access_change_ioas() when xa_alloc() succeeds but iopt_calculate_iova_alignment() fails. Add a new_id in iopt_add_access() and only update iopt_access_list_id when returning successfully. Cc: stable@vger.kernel.org Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers") Link: https://lore.kernel.org/r/2dda7acb25b8562ec5f1310de828ef5da9ef509c.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 504ac1b01b2d..05fd9d3abf1b 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1330,20 +1330,23 @@ out_unlock: int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) { + u32 new_id; int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); - rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, - xa_limit_16b, GFP_KERNEL_ACCOUNT); + rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, + GFP_KERNEL_ACCOUNT); + if (rc) goto out_unlock; rc = iopt_calculate_iova_alignment(iopt); if (rc) { - xa_erase(&iopt->access_list, access->iopt_access_list_id); + xa_erase(&iopt->access_list, new_id); goto out_unlock; } + access->iopt_access_list_id = new_id; out_unlock: up_write(&iopt->iova_rwsem); -- cgit v1.2.3 From fde372df96afddcda3ec94944351f2a14f7cd98d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:46 -0800 Subject: iommufd/selftest: Fix mock_dev_num bug Syzkaller reported the following bug: sysfs: cannot create duplicate filename '/devices/iommufd_mock4' Call Trace: sysfs_warn_dup+0x71/0x90 sysfs_create_dir_ns+0x1ee/0x260 ? sysfs_create_mount_point+0x80/0x80 ? spin_bug+0x1d0/0x1d0 ? do_raw_spin_unlock+0x54/0x220 kobject_add_internal+0x221/0x970 kobject_add+0x11c/0x1e0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 ? kset_create_and_add+0x160/0x160 ? kobject_put+0x5d/0x390 ? bus_get_dev_root+0x4a/0x60 ? kobject_put+0x5d/0x390 device_add+0x1d5/0x1550 ? __fw_devlink_link_to_consumers.isra.0+0x1f0/0x1f0 ? __init_waitqueue_head+0xcb/0x150 iommufd_test+0x462/0x3b60 ? lock_release+0x1fe/0x640 ? __might_fault+0x117/0x170 ? reacquire_held_locks+0x4b0/0x4b0 ? iommufd_selftest_destroy+0xd0/0xd0 ? __might_fault+0xbe/0x170 iommufd_fops_ioctl+0x256/0x350 ? iommufd_option+0x180/0x180 ? __lock_acquire+0x1755/0x45f0 __x64_sys_ioctl+0xa13/0x1640 The bug is triggered when Syzkaller created multiple mock devices but didn't destroy them in the same sequence, messing up the mock_dev_num counter. Replace the atomic with an mock_dev_ida. Cc: stable@vger.kernel.org Fixes: 23a1b46f15d5 ("iommufd/selftest: Make the mock iommu driver into a real driver") Link: https://lore.kernel.org/r/5af41d5af6d5c013cc51de01427abb8141b3587e.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 8abf9747773e..2bfe77bd351d 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -36,7 +36,7 @@ static struct mock_bus_type iommufd_mock_bus_type = { }, }; -static atomic_t mock_dev_num; +static DEFINE_IDA(mock_dev_ida); enum { MOCK_DIRTY_TRACK = 1, @@ -123,6 +123,7 @@ enum selftest_obj_type { struct mock_dev { struct device dev; unsigned long flags; + int id; }; struct selftest_obj { @@ -631,7 +632,7 @@ static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); - atomic_dec(&mock_dev_num); + ida_free(&mock_dev_ida, mdev->id); kfree(mdev); } @@ -653,8 +654,12 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) mdev->dev.release = mock_dev_release; mdev->dev.bus = &iommufd_mock_bus_type.bus; - rc = dev_set_name(&mdev->dev, "iommufd_mock%u", - atomic_inc_return(&mock_dev_num)); + rc = ida_alloc(&mock_dev_ida, GFP_KERNEL); + if (rc < 0) + goto err_put; + mdev->id = rc; + + rc = dev_set_name(&mdev->dev, "iommufd_mock%u", mdev->id); if (rc) goto err_put; -- cgit v1.2.3 From cf7c2789822db8b5efa34f5ebcf1621bc0008d48 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:47 -0800 Subject: iommufd: Fix protection fault in iommufd_test_syz_conv_iova Syzkaller reported the following bug: general protection fault, probably for non-canonical address 0xdffffc0000000038: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x00000000000001c0-0x00000000000001c7] Call Trace: lock_acquire lock_acquire+0x1ce/0x4f0 down_read+0x93/0x4a0 iommufd_test_syz_conv_iova+0x56/0x1f0 iommufd_test_access_rw.isra.0+0x2ec/0x390 iommufd_test+0x1058/0x1e30 iommufd_fops_ioctl+0x381/0x510 vfs_ioctl __do_sys_ioctl __se_sys_ioctl __x64_sys_ioctl+0x170/0x1e0 do_syscall_x64 do_syscall_64+0x71/0x140 This is because the new iommufd_access_change_ioas() sets access->ioas to NULL during its process, so the lock might be gone in a concurrent racing context. Fix this by doing the same access->ioas sanity as iommufd_access_rw() and iommufd_access_pin_pages() functions do. Cc: stable@vger.kernel.org Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers") Link: https://lore.kernel.org/r/3f1932acaf1dd494d404c04364d73ce8f57f3e5e.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 2bfe77bd351d..d59e199a8705 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -63,8 +63,8 @@ enum { * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset * value. This has a much smaller randomization space and syzkaller can hit it. */ -static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, - u64 *iova) +static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt, + u64 *iova) { struct syz_layout { __u32 nth_area; @@ -88,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, return 0; } +static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access, + u64 *iova) +{ + unsigned long ret; + + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return 0; + } + ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova); + mutex_unlock(&access->ioas_lock); + return ret; +} + void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags) { @@ -100,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, ioas = iommufd_get_ioas(ucmd->ictx, ioas_id); if (IS_ERR(ioas)) return; - *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); + *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova); iommufd_put_object(ucmd->ictx, &ioas->obj); } @@ -1161,7 +1176,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + iova = iommufd_test_syz_conv_iova(staccess->access, &cmd->access_pages.iova); npages = (ALIGN(iova + length, PAGE_SIZE) - @@ -1263,8 +1278,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, - &cmd->access_rw.iova); + iova = iommufd_test_syz_conv_iova(staccess->access, + &cmd->access_rw.iova); rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); if (rc) -- cgit v1.2.3 From bb04d13353885f81c87879b2deb296bd2adb6cab Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 23 Feb 2024 14:44:08 -0400 Subject: iommufd/selftest: Don't check map/unmap pairing with HUGE_PAGES Since MOCK_HUGE_PAGE_SIZE was introduced it allows the core code to invoke mock with large page sizes. This confuses the validation logic that checks that map/unmap are paired. This is because the page size computed for map is based on the physical address and in many cases will always be the base page size, however the entire range generated by iommufd will be passed to map. Randomly iommufd can see small groups of physically contiguous pages, (say 8k unaligned and grouped together), but that group crosses a huge page boundary. The map side will observe this as a contiguous run and mark it accordingly, but there is a chance the unmap side will end up terminating interior huge pages in the middle of that group and trigger a validation failure. Meaning the validation only works if the core code passes the iova/length directly from iommufd to mock. syzkaller randomly hits this with failures like: WARNING: CPU: 0 PID: 11568 at drivers/iommu/iommufd/selftest.c:461 mock_domain_unmap_pages+0x1c0/0x250 Modules linked in: CPU: 0 PID: 11568 Comm: syz-executor.0 Not tainted 6.8.0-rc3+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:mock_domain_unmap_pages+0x1c0/0x250 Code: 2b e8 94 37 0f ff 48 d1 eb 31 ff 48 b8 00 00 00 00 00 00 20 00 48 21 c3 48 89 de e8 aa 32 0f ff 48 85 db 75 07 e8 70 37 0f ff <0f> 0b e8 69 37 0f ff 31 f6 31 ff e8 90 32 0f ff e8 5b 37 0f ff 4c RSP: 0018:ffff88800e707490 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff822dfae6 RDX: ffff88800cf86400 RSI: ffffffff822dfaf0 RDI: 0000000000000007 RBP: ffff88800e7074d8 R08: 0000000000000000 R09: ffffed1001167c90 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000001500000 R13: 0000000000083000 R14: 0000000000000001 R15: 0000000000000800 FS: 0000555556048480(0000) GS:ffff88806d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2dc23000 CR3: 0000000008cbb000 CR4: 0000000000350eb0 Call Trace: __iommu_unmap+0x281/0x520 iommu_unmap+0xc9/0x180 iopt_area_unmap_domain_range+0x1b1/0x290 iopt_area_unpin_domain+0x590/0x800 __iopt_area_unfill_domain+0x22e/0x650 iopt_area_unfill_domain+0x47/0x60 iopt_unfill_domain+0x187/0x590 iopt_table_remove_domain+0x267/0x2d0 iommufd_hwpt_paging_destroy+0x1f1/0x370 iommufd_object_remove+0x2a3/0x490 iommufd_device_detach+0x23a/0x2c0 iommufd_selftest_destroy+0x7a/0xf0 iommufd_fops_release+0x1d3/0x340 __fput+0x272/0xb50 __fput_sync+0x4b/0x60 __x64_sys_close+0x8b/0x110 do_syscall_64+0x71/0x140 entry_SYSCALL_64_after_hwframe+0x46/0x4e Do the simple thing and just disable the validation when the huge page tests are being run. Fixes: 7db521e23fe9 ("iommufd/selftest: Hugepage mock domain support") Link: https://lore.kernel.org/r/0-v1-1e17e60a5c8a+103fb-iommufd_mock_hugepg_jgg@nvidia.com Reviewed-by: Joao Martins Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d59e199a8705..7a2199470f31 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -446,20 +446,27 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, /* * iommufd generates unmaps that must be a strict - * superset of the map's performend So every starting - * IOVA should have been an iova passed to map, and the + * superset of the map's performend So every + * starting/ending IOVA should have been an iova passed + * to map. * - * First IOVA must be present and have been a first IOVA - * passed to map_pages + * This simple logic doesn't work when the HUGE_PAGE is + * turned on since the core code will automatically + * switch between the two page sizes creating a break in + * the unmap calls. The break can land in the middle of + * contiguous IOVA. */ - if (first) { - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); - first = false; + if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) { + if (first) { + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); + first = false; + } + if (pgcount == 1 && + cur + MOCK_IO_PAGE_SIZE == pgsize) + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); } - if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); iova += MOCK_IO_PAGE_SIZE; ret += MOCK_IO_PAGE_SIZE; -- cgit v1.2.3 From ecbd8ebb51bf7e4939d83b9e6022a55cac44ef06 Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Fri, 23 Feb 2024 20:11:28 +0800 Subject: md/md-bitmap: fix incorrect usage for sb_index Commit d7038f951828 ("md-bitmap: don't use ->index for pages backing the bitmap file") removed page->index from bitmap code, but left wrong code logic for clustered-md. current code never set slot offset for cluster nodes, will sometimes cause crash in clustered env. Call trace (partly): md_bitmap_file_set_bit+0x110/0x1d8 [md_mod] md_bitmap_startwrite+0x13c/0x240 [md_mod] raid1_make_request+0x6b0/0x1c08 [raid1] md_handle_request+0x1dc/0x368 [md_mod] md_submit_bio+0x80/0xf8 [md_mod] __submit_bio+0x178/0x300 submit_bio_noacct_nocheck+0x11c/0x338 submit_bio_noacct+0x134/0x614 submit_bio+0x28/0xdc submit_bh_wbc+0x130/0x1cc submit_bh+0x1c/0x28 Fixes: d7038f951828 ("md-bitmap: don't use ->index for pages backing the bitmap file") Cc: stable@vger.kernel.org # v6.6+ Signed-off-by: Heming Zhao Reviewed-by: Christoph Hellwig Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240223121128.28985-1-heming.zhao@suse.com --- drivers/md/md-bitmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 9672f75c3050..a4976ceae868 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -234,7 +234,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, sector_t doff; bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; - if (pg_index == store->file_pages - 1) { + /* we compare length (page numbers), not page offset. */ + if ((pg_index - store->sb_index) == store->file_pages - 1) { unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); if (last_page_size == 0) @@ -438,8 +439,8 @@ static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, struct page *page = store->filemap[pg_index]; if (mddev_is_clustered(bitmap->mddev)) { - pg_index += bitmap->cluster_slot * - DIV_ROUND_UP(store->bytes, PAGE_SIZE); + /* go to node bitmap area starting point */ + pg_index += store->sb_index; } if (store->file) @@ -952,6 +953,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) unsigned long index = file_page_index(store, chunk); unsigned long node_offset = 0; + index += store->sb_index; if (mddev_is_clustered(bitmap->mddev)) node_offset = bitmap->cluster_slot * store->file_pages; @@ -982,6 +984,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) unsigned long index = file_page_index(store, chunk); unsigned long node_offset = 0; + index += store->sb_index; if (mddev_is_clustered(bitmap->mddev)) node_offset = bitmap->cluster_slot * store->file_pages; -- cgit v1.2.3 From 5cc2da0b60e5b4daf6cf7442ee66f1f91878c0b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jan 2024 14:07:36 +0100 Subject: scsi: mpi3mr: Reduce stack usage in mpi3mr_refresh_sas_ports() Doubling the number of PHYs also doubled the stack usage of this function, exceeding the 32-bit limit of 1024 bytes: drivers/scsi/mpi3mr/mpi3mr_transport.c: In function 'mpi3mr_refresh_sas_ports': drivers/scsi/mpi3mr/mpi3mr_transport.c:1818:1: error: the frame size of 1636 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Since the sas_io_unit_pg0 structure is already allocated dynamically, use the same method here. The size of the allocation can be smaller based on the actual number of phys now, so use this as an upper bound. Fixes: cb5b60894602 ("scsi: mpi3mr: Increase maximum number of PHYs to 64 from 32") Reviewed-by: Johannes Thumshirn Cc: Sathya Prakash Veerichetty Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240123130754.2011469-1-arnd@kernel.org Tested-by: John Garry #build only Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index c0c8ab586957..d32ad46318cb 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -1671,7 +1671,7 @@ mpi3mr_update_mr_sas_port(struct mpi3mr_ioc *mrioc, struct host_port *h_port, void mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) { - struct host_port h_port[64]; + struct host_port *h_port = NULL; int i, j, found, host_port_count = 0, port_idx; u16 sz, attached_handle, ioc_status; struct mpi3_sas_io_unit_page0 *sas_io_unit_pg0 = NULL; @@ -1685,6 +1685,10 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) sas_io_unit_pg0 = kzalloc(sz, GFP_KERNEL); if (!sas_io_unit_pg0) return; + h_port = kcalloc(64, sizeof(struct host_port), GFP_KERNEL); + if (!h_port) + goto out; + if (mpi3mr_cfg_get_sas_io_unit_pg0(mrioc, sas_io_unit_pg0, sz)) { ioc_err(mrioc, "failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); @@ -1814,6 +1818,7 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) } } out: + kfree(h_port); kfree(sas_io_unit_pg0); } -- cgit v1.2.3 From ee0017c3ed8a8abfa4d40e42f908fb38c31e7515 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 21 Feb 2024 12:47:24 +0530 Subject: scsi: mpt3sas: Prevent sending diag_reset when the controller is ready If the driver detects that the controller is not ready before sending the first IOC facts command, it will wait for a maximum of 10 seconds for it to become ready. However, even if the controller becomes ready within 10 seconds, the driver will still issue a diagnostic reset. Modify the driver to avoid sending a diag reset if the controller becomes ready within the 10-second wait time. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240221071724.14986-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 8761bc58d965..b8120ca93c79 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7378,7 +7378,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } -- cgit v1.2.3 From 0e67899abfbfdea0c3c0ed3fd263ffc601c5c157 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 22 Feb 2024 13:38:38 +0100 Subject: lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected Same as LAN7800, LAN7850 can be used without EEPROM. If EEPROM is not present or not flashed, LAN7850 will fail to sync the speed detected by the PHY with the MAC. In case link speed is 100Mbit, it will accidentally work, otherwise no data can be transferred. Better way would be to implement link_up callback, or set auto speed configuration unconditionally. But this changes would be more intrusive. So, for now, set it only if no EEPROM is found. Fixes: e69647a19c87 ("lan78xx: Set ASD in MAC_CR when EEE is enabled.") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240222123839.2816561-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a6d653ff552a..e06193e831a5 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3033,7 +3033,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ -- cgit v1.2.3 From 0d60d8df6f493bb46bf5db40d39dd60a1bafdd4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Feb 2024 12:32:08 +0000 Subject: dpll: rely on rcu for netdev_dpll_pin() This fixes a possible UAF in if_nlmsg_size(), which can run without RTNL. Add rcu protection to "struct dpll_pin" Move netdev_dpll_pin() from netdevice.h to dpll.h to decrease name pollution. Note: This looks possible to no longer acquire RTNL in netdev_dpll_pin_assign() later in net-next. v2: do not force rcu_read_lock() in rtnl_dpll_pin_size() (Jiri Pirko) Fixes: 5f1842692880 ("netdev: expose DPLL pin handle for netdevice") Signed-off-by: Eric Dumazet Cc: Arkadiusz Kubalewski Cc: Vadim Fedorenko Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240223123208.3543319-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 2 +- drivers/dpll/dpll_core.h | 2 ++ include/linux/dpll.h | 11 +++++++++++ include/linux/netdevice.h | 11 +---------- net/core/dev.c | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 5152bd1b0daf..4c2bb27c99fe 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -564,7 +564,7 @@ void dpll_pin_put(struct dpll_pin *pin) xa_destroy(&pin->parent_refs); xa_erase(&dpll_pin_xa, pin->id); dpll_pin_prop_free(&pin->prop); - kfree(pin); + kfree_rcu(pin, rcu); } mutex_unlock(&dpll_lock); } diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h index 717f715015c7..2b6d8ef1cdf3 100644 --- a/drivers/dpll/dpll_core.h +++ b/drivers/dpll/dpll_core.h @@ -47,6 +47,7 @@ struct dpll_device { * @prop: pin properties copied from the registerer * @rclk_dev_name: holds name of device when pin can recover clock from it * @refcount: refcount + * @rcu: rcu_head for kfree_rcu() **/ struct dpll_pin { u32 id; @@ -57,6 +58,7 @@ struct dpll_pin { struct xarray parent_refs; struct dpll_pin_properties prop; refcount_t refcount; + struct rcu_head rcu; }; /** diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 9cf896ea1d41..4ec2fe9caf5a 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include struct dpll_device; struct dpll_pin; @@ -167,4 +169,13 @@ int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return rcu_dereference_rtnl(dev->dpll_pin); +#else + return NULL; +#endif +} + #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef7bfbb98497..a9c973b92294 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2469,7 +2469,7 @@ struct net_device { struct devlink_port *devlink_port; #if IS_ENABLED(CONFIG_DPLL) - struct dpll_pin *dpll_pin; + struct dpll_pin __rcu *dpll_pin; #endif #if IS_ENABLED(CONFIG_PAGE_POOL) /** @page_pools: page pools created for this netdevice */ @@ -4035,15 +4035,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); void netdev_dpll_pin_clear(struct net_device *dev); -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_DPLL) - return dev->dpll_pin; -#else - return NULL; -#endif -} - struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/net/core/dev.c b/net/core/dev.c index 73a021973007..0230391c78f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9078,7 +9078,7 @@ static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll { #if IS_ENABLED(CONFIG_DPLL) rtnl_lock(); - dev->dpll_pin = dpll_pin; + rcu_assign_pointer(dev->dpll_pin, dpll_pin); rtnl_unlock(); #endif } -- cgit v1.2.3 From e567857cb41c4c4f5bb33fd0ff3c282c5c3c4577 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 26 Feb 2024 12:00:48 +0530 Subject: drm/i915/hdcp: Extract hdcp structure from correct connector Currently intel_hdcp is not being extracted from primary connector this patch fixes that. Fixes: 524240b231ea ("drm/i915/hdcp: Propagate aux info in DP HDCP functions") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Signed-off-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240226063051.1685326-3-suraj.kandpal@intel.com (cherry picked from commit 909fff3e46c08eb6fcbb52e7a49dfb359007ae79) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index a2c075b76728..8538d1ce2fcb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -389,7 +389,9 @@ intel_dp_hdcp2_wait_for_msg(struct intel_connector *connector, const struct hdcp2_dp_msg_data *hdcp2_msg_data) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; u8 msg_id = hdcp2_msg_data->msg_id; int ret, timeout; bool msg_ready = false; @@ -505,8 +507,9 @@ int intel_dp_hdcp2_read_msg(struct intel_connector *connector, { struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_hdcp *hdcp = &connector->hdcp; struct drm_dp_aux *aux = &dig_port->dp.aux; + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; unsigned int offset; u8 *byte = buf; ssize_t ret, bytes_to_recv, len; -- cgit v1.2.3 From a36b0787f074d7441f66c172745653570e09c320 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Feb 2024 14:27:40 -0800 Subject: ionic: check before releasing pci regions AER recovery handler can trigger a PCI Reset after tearing down the device setup in the error detection handler. The PCI Reset handler will also attempt to tear down the device setup, and this second tear down needs to know that it doesn't need to call pci_release_regions() a second time. We can clear num_bars on tear down and use that to decide later if we need to clear the resources. This prevents a harmless but disturbing warning message resource: Trying to free nonexistent resource <0xXXXXXXXXXX-0xXXXXXXXXXX> Fixes: c3a910e1c47a ("ionic: fill out pci error handlers") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 10a9d80db32c..6ba8d4aca0a0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -93,6 +93,7 @@ static void ionic_unmap_bars(struct ionic *ionic) bars[i].len = 0; } } + ionic->num_bars = 0; } void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num) @@ -215,13 +216,15 @@ out: static void ionic_clear_pci(struct ionic *ionic) { - ionic->idev.dev_info_regs = NULL; - ionic->idev.dev_cmd_regs = NULL; - ionic->idev.intr_status = NULL; - ionic->idev.intr_ctrl = NULL; - - ionic_unmap_bars(ionic); - pci_release_regions(ionic->pdev); + if (ionic->num_bars) { + ionic->idev.dev_info_regs = NULL; + ionic->idev.dev_cmd_regs = NULL; + ionic->idev.intr_status = NULL; + ionic->idev.intr_ctrl = NULL; + + ionic_unmap_bars(ionic); + pci_release_regions(ionic->pdev); + } if (pci_is_enabled(ionic->pdev)) pci_disable_device(ionic->pdev); -- cgit v1.2.3 From 7662fad348ac54120e9e6443cb0bbe4f3b582219 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Feb 2024 14:27:41 -0800 Subject: ionic: check cmd_regs before copying in or out Since we now have potential cases of NULL cmd_regs and info_regs during a reset recovery, and left NULL if a reset recovery has failed, we need to check that they exist before we use them. Most of the cases were covered in the original patch where we verify before doing the ioreadb() for health or cmd status. However, we need to protect a few uses of io mem that could be hit in error recovery or asynchronous threads calls as well (e.g. ethtool or devlink handlers). Fixes: 219e183272b4 ("ionic: no fw read when PCI reset failed") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_dev.c | 10 ++++++++++ drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 7 ++++++- drivers/net/ethernet/pensando/ionic/ionic_fw.c | 5 +++++ drivers/net/ethernet/pensando/ionic/ionic_main.c | 3 +++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 1e7c71f7f081..746072b4dbd0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -319,22 +319,32 @@ do_check_time: u8 ionic_dev_cmd_status(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return (u8)PCI_ERROR_RESPONSE; return ioread8(&idev->dev_cmd_regs->comp.comp.status); } bool ionic_dev_cmd_done(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return false; return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE; } void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) { + if (!idev->dev_cmd_regs) + return; memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp)); } void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) { idev->opcode = cmd->cmd.opcode; + + if (!idev->dev_cmd_regs) + return; + memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); iowrite32(0, &idev->dev_cmd_regs->done); iowrite32(1, &idev->dev_cmd_regs->doorbell); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index cd3c0b01402e..0ffc9c4904ac 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -90,18 +90,23 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev; unsigned int offset; unsigned int size; regs->version = IONIC_DEV_CMD_REG_VERSION; + idev = &lif->ionic->idev; + if (!idev->dev_info_regs) + return; + offset = 0; size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32); memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size); offset += size; size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32); - memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); + memcpy_fromio(p + offset, idev->dev_cmd_regs->words, size); } static void ionic_get_link_ext_stats(struct net_device *netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c index 5f40324cd243..3c209c1a2337 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_fw.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c @@ -109,6 +109,11 @@ int ionic_firmware_update(struct ionic_lif *lif, const struct firmware *fw, dl = priv_to_devlink(ionic); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); + if (!idev->dev_cmd_regs) { + err = -ENXIO; + goto err_out; + } + buf_sz = sizeof(idev->dev_cmd_regs->data); netdev_dbg(netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 165ab08ad2dd..2f479de329fe 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -416,6 +416,9 @@ static void ionic_dev_cmd_clean(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; + if (!idev->dev_cmd_regs) + return; + iowrite32(0, &idev->dev_cmd_regs->doorbell); memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } -- cgit v1.2.3 From 155a1efc9b96a39857714aff49a11ebc93022c8c Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Feb 2024 14:27:42 -0800 Subject: ionic: restore netdev feature bits after reset When rebuilding the lif after an FLR, be sure to restore the current netdev features, not do the usual first time feature init. This prevents losing user changes to things like TSO or vlan tagging states. Fixes: 45b84188a0a4 ("ionic: keep filters across FLR") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index cf2d5ad7b68c..fcb44ceeb6aa 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3559,7 +3559,10 @@ int ionic_lif_init(struct ionic_lif *lif) goto err_out_notifyq_deinit; } - err = ionic_init_nic_features(lif); + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + err = ionic_set_nic_features(lif, lif->netdev->features); + else + err = ionic_init_nic_features(lif); if (err) goto err_out_notifyq_deinit; -- cgit v1.2.3 From 6415c7fe7cf420fa469095a34d9153f991391116 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Feb 2024 12:52:51 +0000 Subject: spi: Drop mismerged fix One patch of a series of three that was sent fixing issues with the ppc4xx driver was targeted at -next, unfortunately it being sandwiched between two others that targeted mainline tripped up my workflow and caused it to get merged along with the others. The ppc4xx driver is only buildable in very limited configurations so none of the CI catches issues with it. Fixes: de4af897ddf2 ("spi: ppc4xx: Fix fallout from rename in struct spi_bitbang") Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 942c3117ab3a..82d6264841fc 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -359,22 +359,22 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) /* Setup the state for the bitbang driver */ bbp = &hw->bitbang; - bbp->ctlr = hw->host; + bbp->master = hw->host; bbp->setup_transfer = spi_ppc4xx_setupxfer; bbp->txrx_bufs = spi_ppc4xx_txrx; bbp->use_dma = 0; - bbp->ctlr->setup = spi_ppc4xx_setup; - bbp->ctlr->cleanup = spi_ppc4xx_cleanup; - bbp->ctlr->bits_per_word_mask = SPI_BPW_MASK(8); - bbp->ctlr->use_gpio_descriptors = true; + bbp->master->setup = spi_ppc4xx_setup; + bbp->master->cleanup = spi_ppc4xx_cleanup; + bbp->master->bits_per_word_mask = SPI_BPW_MASK(8); + bbp->master->use_gpio_descriptors = true; /* * The SPI core will count the number of GPIO descriptors to figure * out the number of chip selects available on the platform. */ - bbp->ctlr->num_chipselect = 0; + bbp->master->num_chipselect = 0; /* the spi->mode bits understood by this driver: */ - bbp->ctlr->mode_bits = + bbp->master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST; /* Get the clock for the OPB */ -- cgit v1.2.3 From 1ce7d306ea63f3e379557c79abd88052e0483813 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 23 Feb 2024 15:59:08 -0800 Subject: veth: try harder when allocating queue memory struct veth_rq is pretty large, 832B total without debug options enabled. Since commit under Fixes we try to pre-allocate enough queues for every possible CPU. Miao Wang reports that this may lead to order-5 allocations which will fail in production. Let the allocation fallback to vmalloc() and try harder. These are the same flags we pass to netdev queue allocation. Reported-and-tested-by: Miao Wang Fixes: 9d3684c24a52 ("veth: create by default nr_possible_cpus queues") Link: https://lore.kernel.org/all/5F52CAE2-2FB7-4712-95F1-3312FBBFA8DD@gmail.com/ Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240223235908.693010-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/veth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a786be805709..cd4a6fe458f9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1461,7 +1461,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1477,7 +1478,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) -- cgit v1.2.3 From 4a718d7dbab873bc24034fc865d3a5442632d1fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:42 +0100 Subject: xen-blkfront: set max_discard/secure erase limits to UINT_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xen-blkfront set the max discard limit to the capacity of the device, which is suboptimal when the capacity changes. Just set it to UINT_MAX, which has the same effect and is simpler. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4cc2884e7484..f78167cd5a63 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -944,20 +944,18 @@ static const struct blk_mq_ops blkfront_mq_ops = { static void blkif_set_queue_limits(struct blkfront_info *info) { struct request_queue *rq = info->rq; - struct gendisk *gd = info->gd; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - blk_queue_max_discard_sectors(rq, get_capacity(gd)); + blk_queue_max_discard_sectors(rq, UINT_MAX); rq->limits.discard_granularity = info->discard_granularity ?: info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_max_secure_erase_sectors(rq, - get_capacity(gd)); + blk_queue_max_secure_erase_sectors(rq, UINT_MAX); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ -- cgit v1.2.3 From 738be136327a56e5a67e1942a2c318fb91914a3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:43 +0100 Subject: xen-blkfront: rely on the default discard granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The block layer now sets the discard granularity to the physical block size default. Take advantage of that in xen-blkfront and only set the discard granularity if explicitly specified. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index f78167cd5a63..1258f24b2855 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -951,8 +951,8 @@ static void blkif_set_queue_limits(struct blkfront_info *info) if (info->feature_discard) { blk_queue_max_discard_sectors(rq, UINT_MAX); - rq->limits.discard_granularity = info->discard_granularity ?: - info->physical_sector_size; + if (info->discard_granularity) + rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) blk_queue_max_secure_erase_sectors(rq, UINT_MAX); -- cgit v1.2.3 From 4f81b87d91be2a00195f85847d040c2276cac2ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:44 +0100 Subject: xen-blkfront: don't redundantly set max_sements in blkif_recover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blkif_set_queue_limits already sets the max_sements limits, so don't do it a second time. Also remove a comment about a long fixe bug in blk_mq_update_nr_hw_queues. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1258f24b2855..7664638a0abb 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2008,14 +2008,10 @@ static int blkif_recover(struct blkfront_info *info) struct request *req, *n; int rc; struct bio *bio; - unsigned int segs; struct blkfront_ring_info *rinfo; blkfront_gather_backend_features(info); - /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ blkif_set_queue_limits(info); - segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); @@ -2035,7 +2031,9 @@ static int blkif_recover(struct blkfront_info *info) list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); - BUG_ON(req->nr_phys_segments > segs); + BUG_ON(req->nr_phys_segments > + (info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)); blk_mq_requeue_request(req, false); } blk_mq_start_stopped_hw_queues(info->rq, true); -- cgit v1.2.3 From ba3f67c1163812b5d7ec33705c31edaa30ce6c51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:45 +0100 Subject: xen-blkfront: atomically update queue limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the initial queue limits to blk_mq_alloc_disk and use the blkif_set_queue_limits API to update the limits on reconnect. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7664638a0abb..fd7c0ff2139c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -941,37 +941,35 @@ static const struct blk_mq_ops blkfront_mq_ops = { .complete = blkif_complete_rq, }; -static void blkif_set_queue_limits(struct blkfront_info *info) +static void blkif_set_queue_limits(const struct blkfront_info *info, + struct queue_limits *lim) { - struct request_queue *rq = info->rq; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); - if (info->feature_discard) { - blk_queue_max_discard_sectors(rq, UINT_MAX); + lim->max_hw_discard_sectors = UINT_MAX; if (info->discard_granularity) - rq->limits.discard_granularity = info->discard_granularity; - rq->limits.discard_alignment = info->discard_alignment; + lim->discard_granularity = info->discard_granularity; + lim->discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_max_secure_erase_sectors(rq, UINT_MAX); + lim->max_secure_erase_sectors = UINT_MAX; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_physical_block_size(rq, info->physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + lim->logical_block_size = info->sector_size; + lim->physical_block_size = info->physical_sector_size; + lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512; /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); + lim->seg_boundary_mask = PAGE_SIZE - 1; + lim->max_segment_size = PAGE_SIZE; /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + lim->max_segments = segments / GRANTS_PER_PSEG; /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); + lim->dma_alignment = 511; } static const char *flush_info(struct blkfront_info *info) @@ -1068,6 +1066,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, u16 sector_size, unsigned int physical_sector_size) { + struct queue_limits lim = {}; struct gendisk *gd; int nr_minors = 1; int err; @@ -1134,11 +1133,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, NULL, info); + blkif_set_queue_limits(info, &lim); + gd = blk_mq_alloc_disk(&info->tag_set, &lim, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; } + blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue); strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); @@ -1160,7 +1161,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->gd = gd; info->sector_size = sector_size; info->physical_sector_size = physical_sector_size; - blkif_set_queue_limits(info); xlvbd_flush(info); @@ -2004,14 +2004,19 @@ static int blkfront_probe(struct xenbus_device *dev, static int blkif_recover(struct blkfront_info *info) { + struct queue_limits lim; unsigned int r_index; struct request *req, *n; int rc; struct bio *bio; struct blkfront_ring_info *rinfo; + lim = queue_limits_start_update(info->rq); blkfront_gather_backend_features(info); - blkif_set_queue_limits(info); + blkif_set_queue_limits(info, &lim); + rc = queue_limits_commit_update(info->rq, &lim); + if (rc) + return rc; for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); -- cgit v1.2.3 From b7357ec21df979b9f72bac61df195dd30eab3381 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:32 +0100 Subject: irqchip/imgpdc: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/472fc6f6bcd54b73f8af206d079a80cb8744d0ca.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-imgpdc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c index 5831be454673..b42ed68acfa6 100644 --- a/drivers/irqchip/irq-imgpdc.c +++ b/drivers/irqchip/irq-imgpdc.c @@ -461,12 +461,11 @@ err_generic: return ret; } -static int pdc_intc_remove(struct platform_device *pdev) +static void pdc_intc_remove(struct platform_device *pdev) { struct pdc_intc_priv *priv = platform_get_drvdata(pdev); irq_domain_remove(priv->domain); - return 0; } static const struct of_device_id pdc_intc_match[] = { @@ -479,8 +478,8 @@ static struct platform_driver pdc_intc_driver = { .name = "pdc-intc", .of_match_table = pdc_intc_match, }, - .probe = pdc_intc_probe, - .remove = pdc_intc_remove, + .probe = pdc_intc_probe, + .remove_new = pdc_intc_remove, }; static int __init pdc_intc_init(void) -- cgit v1.2.3 From 984e5c7b8d7bac9efd3818e06dcf1f7aab7701b2 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:33 +0100 Subject: irqchip/imx-intmux: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/edeee074956dd943d3c67da894a01dc5f0d33bd7.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-imx-intmux.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c index aa041e4dfee0..656eab21285c 100644 --- a/drivers/irqchip/irq-imx-intmux.c +++ b/drivers/irqchip/irq-imx-intmux.c @@ -282,7 +282,7 @@ out: return ret; } -static int imx_intmux_remove(struct platform_device *pdev) +static void imx_intmux_remove(struct platform_device *pdev) { struct intmux_data *data = platform_get_drvdata(pdev); int i; @@ -298,8 +298,6 @@ static int imx_intmux_remove(struct platform_device *pdev) } pm_runtime_disable(&pdev->dev); - - return 0; } #ifdef CONFIG_PM @@ -354,11 +352,11 @@ static const struct of_device_id imx_intmux_id[] = { static struct platform_driver imx_intmux_driver = { .driver = { - .name = "imx-intmux", - .of_match_table = imx_intmux_id, - .pm = &imx_intmux_pm_ops, + .name = "imx-intmux", + .of_match_table = imx_intmux_id, + .pm = &imx_intmux_pm_ops, }, - .probe = imx_intmux_probe, - .remove = imx_intmux_remove, + .probe = imx_intmux_probe, + .remove_new = imx_intmux_remove, }; builtin_platform_driver(imx_intmux_driver); -- cgit v1.2.3 From 5fcf3688e8755cb81fa48fc0fe724cde877491c3 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:34 +0100 Subject: irqchip/imx-irqsteer: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/c0e5afe62256860150d25bcf644f2b8d62794c86.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-imx-irqsteer.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c index bd9543314539..20cf7a9e9ece 100644 --- a/drivers/irqchip/irq-imx-irqsteer.c +++ b/drivers/irqchip/irq-imx-irqsteer.c @@ -231,7 +231,7 @@ out: return ret; } -static int imx_irqsteer_remove(struct platform_device *pdev) +static void imx_irqsteer_remove(struct platform_device *pdev) { struct irqsteer_data *irqsteer_data = platform_get_drvdata(pdev); int i; @@ -243,8 +243,6 @@ static int imx_irqsteer_remove(struct platform_device *pdev) irq_domain_remove(irqsteer_data->domain); clk_disable_unprepare(irqsteer_data->ipg_clk); - - return 0; } #ifdef CONFIG_PM @@ -307,11 +305,11 @@ static const struct of_device_id imx_irqsteer_dt_ids[] = { static struct platform_driver imx_irqsteer_driver = { .driver = { - .name = "imx-irqsteer", - .of_match_table = imx_irqsteer_dt_ids, - .pm = &imx_irqsteer_pm_ops, + .name = "imx-irqsteer", + .of_match_table = imx_irqsteer_dt_ids, + .pm = &imx_irqsteer_pm_ops, }, - .probe = imx_irqsteer_probe, - .remove = imx_irqsteer_remove, + .probe = imx_irqsteer_probe, + .remove_new = imx_irqsteer_remove, }; builtin_platform_driver(imx_irqsteer_driver); -- cgit v1.2.3 From e58e0b5290bfb3ae022d95794499ae62e38c73b5 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:35 +0100 Subject: irqchip/keystone: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/4c852a3359aa06bedcf3a10f3fd8c1e008cc5a3a.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-keystone.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index a36396db4b08..30f1979fa124 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -190,7 +190,7 @@ static int keystone_irq_probe(struct platform_device *pdev) return 0; } -static int keystone_irq_remove(struct platform_device *pdev) +static void keystone_irq_remove(struct platform_device *pdev) { struct keystone_irq_device *kirq = platform_get_drvdata(pdev); int hwirq; @@ -201,7 +201,6 @@ static int keystone_irq_remove(struct platform_device *pdev) irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq)); irq_domain_remove(kirq->irqd); - return 0; } static const struct of_device_id keystone_irq_dt_ids[] = { @@ -212,7 +211,7 @@ MODULE_DEVICE_TABLE(of, keystone_irq_dt_ids); static struct platform_driver keystone_irq_device_driver = { .probe = keystone_irq_probe, - .remove = keystone_irq_remove, + .remove_new = keystone_irq_remove, .driver = { .name = "keystone_irq", .of_match_table = of_match_ptr(keystone_irq_dt_ids), -- cgit v1.2.3 From f62c5be8a63add3b807230287488fc8a9a3b9c34 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:36 +0100 Subject: irqchip/ls-scfg-msi: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/1e7143ca68ff0715e0f954504e750fc92e8c6d80.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-ls-scfg-msi.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 15cf80b46322..1aef5c4d27c6 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -398,7 +398,7 @@ static int ls_scfg_msi_probe(struct platform_device *pdev) return 0; } -static int ls_scfg_msi_remove(struct platform_device *pdev) +static void ls_scfg_msi_remove(struct platform_device *pdev) { struct ls_scfg_msi *msi_data = platform_get_drvdata(pdev); int i; @@ -410,17 +410,15 @@ static int ls_scfg_msi_remove(struct platform_device *pdev) irq_domain_remove(msi_data->parent); platform_set_drvdata(pdev, NULL); - - return 0; } static struct platform_driver ls_scfg_msi_driver = { .driver = { - .name = "ls-scfg-msi", - .of_match_table = ls_scfg_msi_id, + .name = "ls-scfg-msi", + .of_match_table = ls_scfg_msi_id, }, - .probe = ls_scfg_msi_probe, - .remove = ls_scfg_msi_remove, + .probe = ls_scfg_msi_probe, + .remove_new = ls_scfg_msi_remove, }; module_platform_driver(ls_scfg_msi_driver); -- cgit v1.2.3 From 8d0f3e7bdef44236d33ac4a5f2106602e0d4e1ea Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:37 +0100 Subject: irqchip/madera: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/r/64c2f79760c53f29651e7126418c407ff699317d.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-madera.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-madera.c b/drivers/irqchip/irq-madera.c index 3eb1f8cdf674..acceb6e7fa95 100644 --- a/drivers/irqchip/irq-madera.c +++ b/drivers/irqchip/irq-madera.c @@ -222,7 +222,7 @@ static int madera_irq_probe(struct platform_device *pdev) return 0; } -static int madera_irq_remove(struct platform_device *pdev) +static void madera_irq_remove(struct platform_device *pdev) { struct madera *madera = dev_get_drvdata(pdev->dev.parent); @@ -232,13 +232,11 @@ static int madera_irq_remove(struct platform_device *pdev) */ madera->irq_dev = NULL; regmap_del_irq_chip(madera->irq, madera->irq_data); - - return 0; } static struct platform_driver madera_irq_driver = { - .probe = &madera_irq_probe, - .remove = &madera_irq_remove, + .probe = madera_irq_probe, + .remove_new = madera_irq_remove, .driver = { .name = "madera-irq", .pm = &madera_irq_pm_ops, -- cgit v1.2.3 From b7b58085a5b57bbe83194e12c31977f345c87cce Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:38 +0100 Subject: irqchip/mvebu-pic: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Acked-by: Gregory CLEMENT Link: https://lore.kernel.org/r/df977ad4c02ff913b01cdd6c348e7fae3e08e651.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-mvebu-pic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c index ef3d3646ccc2..d17d9c0e2880 100644 --- a/drivers/irqchip/irq-mvebu-pic.c +++ b/drivers/irqchip/irq-mvebu-pic.c @@ -167,14 +167,12 @@ static int mvebu_pic_probe(struct platform_device *pdev) return 0; } -static int mvebu_pic_remove(struct platform_device *pdev) +static void mvebu_pic_remove(struct platform_device *pdev) { struct mvebu_pic *pic = platform_get_drvdata(pdev); on_each_cpu(mvebu_pic_disable_percpu_irq, pic, 1); irq_domain_remove(pic->domain); - - return 0; } static const struct of_device_id mvebu_pic_of_match[] = { @@ -184,11 +182,11 @@ static const struct of_device_id mvebu_pic_of_match[] = { MODULE_DEVICE_TABLE(of, mvebu_pic_of_match); static struct platform_driver mvebu_pic_driver = { - .probe = mvebu_pic_probe, - .remove = mvebu_pic_remove, + .probe = mvebu_pic_probe, + .remove_new = mvebu_pic_remove, .driver = { - .name = "mvebu-pic", - .of_match_table = mvebu_pic_of_match, + .name = "mvebu-pic", + .of_match_table = mvebu_pic_of_match, }, }; module_platform_driver(mvebu_pic_driver); -- cgit v1.2.3 From abe9da4056bc6f13ab96c1511d23bd4635bb0bf0 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:39 +0100 Subject: irqchip/pruss-intc: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/071057cfdc0bc52c574f74156b410c0337adb69c.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-pruss-intc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c index 0f64ecb9b1f4..060eb000e9d3 100644 --- a/drivers/irqchip/irq-pruss-intc.c +++ b/drivers/irqchip/irq-pruss-intc.c @@ -599,7 +599,7 @@ fail_irq: return ret; } -static int pruss_intc_remove(struct platform_device *pdev) +static void pruss_intc_remove(struct platform_device *pdev) { struct pruss_intc *intc = platform_get_drvdata(pdev); u8 max_system_events = intc->soc_config->num_system_events; @@ -616,8 +616,6 @@ static int pruss_intc_remove(struct platform_device *pdev) irq_dispose_mapping(irq_find_mapping(intc->domain, hwirq)); irq_domain_remove(intc->domain); - - return 0; } static const struct pruss_intc_match_data pruss_intc_data = { @@ -645,12 +643,12 @@ MODULE_DEVICE_TABLE(of, pruss_intc_of_match); static struct platform_driver pruss_intc_driver = { .driver = { - .name = "pruss-intc", - .of_match_table = pruss_intc_of_match, - .suppress_bind_attrs = true, + .name = "pruss-intc", + .of_match_table = pruss_intc_of_match, + .suppress_bind_attrs = true, }, - .probe = pruss_intc_probe, - .remove = pruss_intc_remove, + .probe = pruss_intc_probe, + .remove_new = pruss_intc_remove, }; module_platform_driver(pruss_intc_driver); -- cgit v1.2.3 From be5476f6658ac4cc562b4c4ea4c90acd18ee7a29 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:40 +0100 Subject: irqchip/renesas-intc-irqpin: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/6dc03cf63382d24f954c167aaa988f8e31d6b89d.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-renesas-intc-irqpin.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index fa19585f3dee..9ad37237ba95 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -561,14 +561,13 @@ err0: return ret; } -static int intc_irqpin_remove(struct platform_device *pdev) +static void intc_irqpin_remove(struct platform_device *pdev) { struct intc_irqpin_priv *p = platform_get_drvdata(pdev); irq_domain_remove(p->irq_domain); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); - return 0; } static int __maybe_unused intc_irqpin_suspend(struct device *dev) @@ -585,11 +584,11 @@ static SIMPLE_DEV_PM_OPS(intc_irqpin_pm_ops, intc_irqpin_suspend, NULL); static struct platform_driver intc_irqpin_device_driver = { .probe = intc_irqpin_probe, - .remove = intc_irqpin_remove, + .remove_new = intc_irqpin_remove, .driver = { - .name = "renesas_intc_irqpin", - .of_match_table = intc_irqpin_dt_ids, - .pm = &intc_irqpin_pm_ops, + .name = "renesas_intc_irqpin", + .of_match_table = intc_irqpin_dt_ids, + .pm = &intc_irqpin_pm_ops, } }; -- cgit v1.2.3 From 127806dc0b2aacd4355a977a6d8ba5cc6d64f55e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:41 +0100 Subject: irqchip/renesas-irqc: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/2d367ab738ed2e4cf58cffc10d64b0cbe8a1322c.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-renesas-irqc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 49b446b396f9..76026e0b8e20 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -218,14 +218,13 @@ err_runtime_pm_disable: return ret; } -static int irqc_remove(struct platform_device *pdev) +static void irqc_remove(struct platform_device *pdev) { struct irqc_priv *p = platform_get_drvdata(pdev); irq_domain_remove(p->irq_domain); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); - return 0; } static int __maybe_unused irqc_suspend(struct device *dev) @@ -248,11 +247,11 @@ MODULE_DEVICE_TABLE(of, irqc_dt_ids); static struct platform_driver irqc_device_driver = { .probe = irqc_probe, - .remove = irqc_remove, + .remove_new = irqc_remove, .driver = { - .name = "renesas_irqc", + .name = "renesas_irqc", .of_match_table = irqc_dt_ids, - .pm = &irqc_pm_ops, + .pm = &irqc_pm_ops, } }; -- cgit v1.2.3 From d1c762d93a3bcb42ba3a6b8b09324c7863feef33 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:42 +0100 Subject: irqchip/renesas-rza1: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/1a80e31525d0b02063d2ff1baaaa5e87418f54b6.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-renesas-rza1.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c index e4c99c2e0373..f05afe82db4d 100644 --- a/drivers/irqchip/irq-renesas-rza1.c +++ b/drivers/irqchip/irq-renesas-rza1.c @@ -244,12 +244,11 @@ out_put_node: return ret; } -static int rza1_irqc_remove(struct platform_device *pdev) +static void rza1_irqc_remove(struct platform_device *pdev) { struct rza1_irqc_priv *priv = platform_get_drvdata(pdev); irq_domain_remove(priv->irq_domain); - return 0; } static const struct of_device_id rza1_irqc_dt_ids[] = { @@ -260,9 +259,9 @@ MODULE_DEVICE_TABLE(of, rza1_irqc_dt_ids); static struct platform_driver rza1_irqc_device_driver = { .probe = rza1_irqc_probe, - .remove = rza1_irqc_remove, + .remove_new = rza1_irqc_remove, .driver = { - .name = "renesas_rza1_irqc", + .name = "renesas_rza1_irqc", .of_match_table = rza1_irqc_dt_ids, } }; -- cgit v1.2.3 From 935603e8199991ac1f72c47a41a558f43f1a6004 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:43 +0100 Subject: irqchip/stm32-exti: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Reviewed-by: Antonio Borneo Link: https://lore.kernel.org/r/ac551b89025bafadce05102b94596f8cd3564a32.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-stm32-exti.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 971240e2e31b..26a5193d0ae4 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -898,10 +898,9 @@ static void stm32_exti_remove_irq(void *data) irq_domain_remove(domain); } -static int stm32_exti_remove(struct platform_device *pdev) +static void stm32_exti_remove(struct platform_device *pdev) { stm32_exti_h_syscore_deinit(); - return 0; } static int stm32_exti_probe(struct platform_device *pdev) @@ -991,10 +990,10 @@ MODULE_DEVICE_TABLE(of, stm32_exti_ids); static struct platform_driver stm32_exti_driver = { .probe = stm32_exti_probe, - .remove = stm32_exti_remove, + .remove_new = stm32_exti_remove, .driver = { - .name = "stm32_exti", - .of_match_table = stm32_exti_ids, + .name = "stm32_exti", + .of_match_table = stm32_exti_ids, }, }; -- cgit v1.2.3 From f7f56d59a3923e95bad2c49615a4d7313ed78314 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Dec 2023 23:50:44 +0100 Subject: irqchip/ts4800: Convert to platform_driver::remove_new() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/0bdce86b50e5aa50cffbc4add332cbfbad87521e.1703284359.git.u.kleine-koenig@pengutronix.de --- drivers/irqchip/irq-ts4800.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c index b2d61d4f6fe6..57f610dab6b8 100644 --- a/drivers/irqchip/irq-ts4800.c +++ b/drivers/irqchip/irq-ts4800.c @@ -139,13 +139,11 @@ static int ts4800_ic_probe(struct platform_device *pdev) return 0; } -static int ts4800_ic_remove(struct platform_device *pdev) +static void ts4800_ic_remove(struct platform_device *pdev) { struct ts4800_irq_data *data = platform_get_drvdata(pdev); irq_domain_remove(data->domain); - - return 0; } static const struct of_device_id ts4800_ic_of_match[] = { @@ -155,11 +153,11 @@ static const struct of_device_id ts4800_ic_of_match[] = { MODULE_DEVICE_TABLE(of, ts4800_ic_of_match); static struct platform_driver ts4800_ic_driver = { - .probe = ts4800_ic_probe, - .remove = ts4800_ic_remove, + .probe = ts4800_ic_probe, + .remove_new = ts4800_ic_remove, .driver = { - .name = "ts4800-irqc", - .of_match_table = ts4800_ic_of_match, + .name = "ts4800-irqc", + .of_match_table = ts4800_ic_of_match, }, }; module_platform_driver(ts4800_ic_driver); -- cgit v1.2.3 From 0f8ca019544a252d1afb468ce840c6dcbac73af4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 20 Feb 2024 09:14:25 +0530 Subject: drm/amd/display: Prevent potential buffer overflow in map_hw_resources Adds a check in the map_hw_resources function to prevent a potential buffer overflow. The function was accessing arrays using an index that could potentially be greater than the size of the arrays, leading to a buffer overflow. Adds a check to ensure that the index is within the bounds of the arrays. If the index is out of bounds, an error message is printed and break it will continue execution with just ignoring extra data early to prevent the buffer overflow. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:79 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id' 6 <= 7 drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:81 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id' 6 <= 7 Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Cc: Rodrigo Siqueira Cc: Roman Li Cc: Qingqing Zhuo Cc: Aurabindo Pillai Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Suggested-by: Roman Li Reviewed-by: Roman Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 26307e599614..2a58a7687bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2, in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; } for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; -- cgit v1.2.3 From 7968e9748fbbd7ae49770d9f8a8231d8bce2aebb Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Thu, 22 Feb 2024 17:08:42 +0800 Subject: drm/amdgpu/pm: Fix the power1_min_cap value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unreasonable to use 0 as the power1_min_cap when OD is disabled. So, use the same lower limit as the value used when OD is enabled. Fixes: 1958946858a6 ("drm/amd/pm: Support for getting power1_cap_min value") Signed-off-by: Ma Jun Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 9 ++++----- 5 files changed, 20 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4cd43bbec910..bcad42534da4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 8d1d29ffb0f1..ed189a3878eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (smu->od_enabled && - navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { + navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 21fc033528fa..e2ad2b972ab0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9954ffc02c5..9b80f18ea6c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2369,13 +2369,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 0ffdb58af74e..3dc7b60cb075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2333,13 +2333,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); -- cgit v1.2.3 From c671ec01311b4744b377f98b0b4c6d033fe569b3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 22 Feb 2024 20:56:59 +0800 Subject: drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series Currently, GPU resets can now be performed successfully on the Raven series. While GPU reset is required for the S3 suspend abort case. So now can enable gpu reset for S3 abort cases on the Raven series. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c64c01e2944a..1c614451dead 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1298,24 +1321,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; -- cgit v1.2.3 From 955558030954b9637b41c97b730f9b38c92ac488 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Aug 2023 15:06:00 -0400 Subject: Revert "drm/amd/pm: resolve reboot exception for si oland" This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index df4f20293c16..eb4da3666e05 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } -- cgit v1.2.3 From 65730fe8f4fb039683d76fa8ea7e8d18a53c6cc6 Mon Sep 17 00:00:00 2001 From: Vadim Shakirov Date: Tue, 27 Feb 2024 20:00:01 +0300 Subject: drivers: perf: added capabilities for legacy PMU Added the PERF_PMU_CAP_NO_INTERRUPT flag because the legacy pmu driver does not provide sampling capabilities Added the PERF_PMU_CAP_NO_EXCLUDE flag because the legacy pmu driver does not provide the ability to disable counter incrementation in different privilege modes Suggested-by: Atish Patra Signed-off-by: Vadim Shakirov Reviewed-by: Atish Patra Fixes: 9b3e150e310e ("RISC-V: Add a simple platform driver for RISC-V legacy perf") Link: https://lore.kernel.org/r/20240227170002.188671-2-vadim.shakirov@syntacore.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_legacy.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index 79fdd667922e..a85fc9a15f03 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -117,6 +117,8 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->event_mapped = pmu_legacy_event_mapped; pmu->event_unmapped = pmu_legacy_event_unmapped; pmu->csr_index = pmu_legacy_csr_index; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); } -- cgit v1.2.3 From 682dc133f83e0194796e6ea72eb642df1c03dfbe Mon Sep 17 00:00:00 2001 From: Vadim Shakirov Date: Tue, 27 Feb 2024 20:00:02 +0300 Subject: drivers: perf: ctr_get_width function for legacy is not defined With parameters CONFIG_RISCV_PMU_LEGACY=y and CONFIG_RISCV_PMU_SBI=n linux kernel crashes when you try perf record: $ perf record ls [ 46.749286] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 46.750199] Oops [#1] [ 46.750342] Modules linked in: [ 46.750608] CPU: 0 PID: 107 Comm: perf-exec Not tainted 6.6.0 #2 [ 46.750906] Hardware name: riscv-virtio,qemu (DT) [ 46.751184] epc : 0x0 [ 46.751430] ra : arch_perf_update_userpage+0x54/0x13e [ 46.751680] epc : 0000000000000000 ra : ffffffff8072ee52 sp : ff2000000022b8f0 [ 46.751958] gp : ffffffff81505988 tp : ff6000000290d400 t0 : ff2000000022b9c0 [ 46.752229] t1 : 0000000000000001 t2 : 0000000000000003 s0 : ff2000000022b930 [ 46.752451] s1 : ff600000028fb000 a0 : 0000000000000000 a1 : ff600000028fb000 [ 46.752673] a2 : 0000000ae2751268 a3 : 00000000004fb708 a4 : 0000000000000004 [ 46.752895] a5 : 0000000000000000 a6 : 000000000017ffe3 a7 : 00000000000000d2 [ 46.753117] s2 : ff600000028fb000 s3 : 0000000ae2751268 s4 : 0000000000000000 [ 46.753338] s5 : ffffffff8153e290 s6 : ff600000863b9000 s7 : ff60000002961078 [ 46.753562] s8 : ff60000002961048 s9 : ff60000002961058 s10: 0000000000000001 [ 46.753783] s11: 0000000000000018 t3 : ffffffffffffffff t4 : ffffffffffffffff [ 46.754005] t5 : ff6000000292270c t6 : ff2000000022bb30 [ 46.754179] status: 0000000200000100 badaddr: 0000000000000000 cause: 000000000000000c [ 46.754653] Code: Unable to access instruction at 0xffffffffffffffec. [ 46.754939] ---[ end trace 0000000000000000 ]--- [ 46.755131] note: perf-exec[107] exited with irqs disabled [ 46.755546] note: perf-exec[107] exited with preempt_count 4 This happens because in the legacy case the ctr_get_width function was not defined, but it is used in arch_perf_update_userpage. Also remove extra check in riscv_pmu_ctr_get_width_mask Signed-off-by: Vadim Shakirov Reviewed-by: Alexandre Ghiti Reviewed-by: Atish Patra Fixes: cc4c07c89aad ("drivers: perf: Implement perf event mmap support in the SBI backend") Link: https://lore.kernel.org/r/20240227170002.188671-3-vadim.shakirov@syntacore.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu.c | 18 +++++------------- drivers/perf/riscv_pmu_legacy.c | 8 +++++++- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 0dda70e1ef90..c78a6fd6c57f 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -150,19 +150,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - if (!rvpmu->ctr_get_width) - /** - * If the pmu driver doesn't support counter width, set it to default - * maximum allowed by the specification. - */ - cwidth = 63; - else { - if (hwc->idx == -1) - /* Handle init case where idx is not initialized yet */ - cwidth = rvpmu->ctr_get_width(0); - else - cwidth = rvpmu->ctr_get_width(hwc->idx); - } + if (hwc->idx == -1) + /* Handle init case where idx is not initialized yet */ + cwidth = rvpmu->ctr_get_width(0); + else + cwidth = rvpmu->ctr_get_width(hwc->idx); return GENMASK_ULL(cwidth, 0); } diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index a85fc9a15f03..fa0bccf4edf2 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config) return pmu_legacy_ctr_get_idx(event); } +/* cycle & instret are always 64 bit, one bit less according to SBI spec */ +static int pmu_legacy_ctr_get_width(int idx) +{ + return 63; +} + static u64 pmu_legacy_read_ctr(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -111,7 +117,7 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->ctr_stop = NULL; pmu->event_map = pmu_legacy_event_map; pmu->ctr_get_idx = pmu_legacy_ctr_get_idx; - pmu->ctr_get_width = NULL; + pmu->ctr_get_width = pmu_legacy_ctr_get_width; pmu->ctr_clear_idx = NULL; pmu->ctr_read = pmu_legacy_read_ctr; pmu->event_mapped = pmu_legacy_event_mapped; -- cgit v1.2.3 From 678c607ecf8a9b1b2ea09c367877164ba66cb11f Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 26 Feb 2024 09:37:37 +0530 Subject: irqchip/riscv-intc: Fix low-level interrupt handler setup for AIA Use riscv_intc_aia_irq() as the low-level interrupt handler instead of the existing riscv_intc_irq() default handler to make demultiplexing work correctly. Also print "using AIA" in the INTC boot banner when AIA is available. Fixes: 3c46fc5b5507 ("irqchip/riscv-intc: Add support for RISC-V AIA") Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240226040746.1396416-2-apatel@ventanamicro.com --- drivers/irqchip/irq-riscv-intc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index cccb65339982..f87aeab460eb 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -183,7 +183,10 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_ch return -ENXIO; } - rc = set_handle_irq(&riscv_intc_irq); + if (riscv_isa_extension_available(NULL, SxAIA)) + rc = set_handle_irq(&riscv_intc_aia_irq); + else + rc = set_handle_irq(&riscv_intc_irq); if (rc) { pr_err("failed to set irq handler\n"); return rc; @@ -191,8 +194,9 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_ch riscv_set_intc_hwnode_fn(riscv_intc_hwnode); - pr_info("%d local interrupts mapped\n", - riscv_isa_extension_available(NULL, SxAIA) ? 64 : riscv_intc_nr_irqs); + pr_info("%d local interrupts mapped%s\n", + riscv_isa_extension_available(NULL, SxAIA) ? 64 : riscv_intc_nr_irqs, + riscv_isa_extension_available(NULL, SxAIA) ? " using AIA" : ""); if (riscv_intc_custom_nr_irqs) pr_info("%d custom local interrupts mapped\n", riscv_intc_custom_nr_irqs); -- cgit v1.2.3 From dfd2bf436709b2bccb78c2dda550dde93700efa7 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 12 Jan 2024 15:10:17 +0800 Subject: md/raid5: fix atomicity violation in raid5_cache_count In raid5_cache_count(): if (conf->max_nr_stripes < conf->min_nr_stripes) return 0; return conf->max_nr_stripes - conf->min_nr_stripes; The current check is ineffective, as the values could change immediately after being checked. In raid5_set_cache_size(): ... conf->min_nr_stripes = size; ... while (size > conf->max_nr_stripes) conf->min_nr_stripes = conf->max_nr_stripes; ... Due to intermediate value updates in raid5_set_cache_size(), concurrent execution of raid5_cache_count() and raid5_set_cache_size() may lead to inconsistent reads of conf->max_nr_stripes and conf->min_nr_stripes. The current checks are ineffective as values could change immediately after being checked, raising the risk of conf->min_nr_stripes exceeding conf->max_nr_stripes and potentially causing an integer overflow. This possible bug is found by an experimental static analysis tool developed by our team. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 6.2. To resolve this issue, it is suggested to introduce local variables 'min_stripes' and 'max_stripes' in raid5_cache_count() to ensure the values remain stable throughout the check. Adding locks in raid5_cache_count() fails to resolve atomicity violations, as raid5_set_cache_size() may hold intermediate values of conf->min_nr_stripes while unlocked. With this patch applied, our tool no longer reports the bug, with the kernel configuration allyesconfig for x86_64. Due to the lack of associated hardware, we cannot test the patch in runtime testing, and just verify it according to the code logic. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240112071017.16313-1-2045gemini@gmail.com Signed-off-by: Song Liu --- drivers/md/raid5.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 14f2cf75abbd..7ec445f49f1c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2412,7 +2412,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) atomic_inc(&conf->active_stripes); raid5_release_stripe(sh); - conf->max_nr_stripes++; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); return 1; } @@ -2707,7 +2707,7 @@ static int drop_one_stripe(struct r5conf *conf) shrink_buffers(sh); free_stripe(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); return 1; } @@ -6820,7 +6820,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) if (size <= 16 || size > 32768) return -EINVAL; - conf->min_nr_stripes = size; + WRITE_ONCE(conf->min_nr_stripes, size); mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) @@ -6832,7 +6832,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) { - conf->min_nr_stripes = conf->max_nr_stripes; + WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); result = -ENOMEM; break; } @@ -7388,11 +7388,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = shrink->private_data; + int max_stripes = READ_ONCE(conf->max_nr_stripes); + int min_stripes = READ_ONCE(conf->min_nr_stripes); - if (conf->max_nr_stripes < conf->min_nr_stripes) + if (max_stripes < min_stripes) /* unlikely, but not impossible */ return 0; - return conf->max_nr_stripes - conf->min_nr_stripes; + return max_stripes - min_stripes; } static struct r5conf *setup_conf(struct mddev *mddev) -- cgit v1.2.3 From 8a904a3caa88118744062e872ae90f37748a8fd8 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:18 +0800 Subject: rtc: test: Fix invalid format specifier. 'days' is a s64 (from div_s64), and so should use a %lld specifier. This was found by extending KUnit's assertion macros to use gcc's __printf attribute. Fixes: 1d1bb12a8b18 ("rtc: Improve performance of rtc_time64_to_tm(). Add tests.") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Acked-by: Alexandre Belloni Signed-off-by: Shuah Khan --- drivers/rtc/lib_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c index d5caf36c56cd..225c859d6da5 100644 --- a/drivers/rtc/lib_test.c +++ b/drivers/rtc/lib_test.c @@ -54,7 +54,7 @@ static void rtc_time64_to_tm_test_date_range(struct kunit *test) days = div_s64(secs, 86400); - #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ + #define FAIL_MSG "%d/%02d/%02d (%2d) : %lld", \ year, month, mday, yday, days KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); -- cgit v1.2.3 From 689a930b93c5c20294df5da0407df361c5412eac Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Feb 2024 17:27:21 +0800 Subject: drm/xe/tests: Fix printf format specifiers in xe_migrate test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KUNIT_FAIL() is used to fail the xe_migrate test when an error occurs. However, there's a mismatch in the format specifier: '%li' is used to log 'err', which is an 'int'. Use '%i' instead of '%li', and for the case where we're printing an error pointer, just use '%pe', instead of extracting the error code manually with PTR_ERR(). (This also results in a nicer output when the error code is known.) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: David Gow Tested-by: Guenter Roeck Reviewed-by: Lucas De Marchi Acked-by: Thomas Hellström Signed-off-by: Shuah Khan --- drivers/gpu/drm/xe/tests/xe_migrate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index a6523df0f1d3..c347e2c29f81 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -114,21 +114,21 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, region | XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(remote)) { - KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n", - str, PTR_ERR(remote)); + KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", + str, remote); return; } err = xe_bo_validate(remote, NULL, false); if (err) { - KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", + KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", str, err); goto out_unlock; } err = xe_bo_vmap(remote); if (err) { - KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %i\n", str, err); goto out_unlock; } -- cgit v1.2.3 From c68b2c9eba38ec3f60f4894b189090febf4d8d22 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 25 Feb 2024 00:20:06 +0100 Subject: net: usb: dm9601: fix wrong return value in dm9601_mdio_read The MII code does not check the return value of mdio_read (among others), and therefore no error code should be sent. A previous fix to the use of an uninitialized variable propagates negative error codes, that might lead to wrong operations by the MII library. An example of such issues is the use of mii_nway_restart by the dm9601 driver. The mii_nway_restart function does not check the value returned by mdio_read, which in this case might be a negative number which could contain the exact bit the function checks (BMCR_ANENABLE = 0x1000). Return zero in case of error, as it is common practice in users of mdio_read to avoid wrong uses of the return value. Fixes: 8f8abb863fa5 ("net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read") Signed-off-by: Javier Carrasco Reviewed-by: Simon Horman Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/20240225-dm9601_ret_err-v1-1-02c1d959ea59@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/dm9601.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972d..8b6d6a1b3c2e 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, -- cgit v1.2.3 From f72a1994698ebe1a9eeadba7bfe2aa01afcde0a4 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Sun, 25 Feb 2024 12:38:37 +0100 Subject: net: stmmac: Complete meta data only when enabled Currently using plain XDP/ZC sockets on stmmac results in a kernel crash: |[ 255.822584] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 |[...] |[ 255.822764] Call trace: |[ 255.822766] stmmac_tx_clean.constprop.0+0x848/0xc38 The program counter indicates xsk_tx_metadata_complete(). It works on compl->tx_timestamp, which is not set by xsk_tx_metadata_to_compl() due to missing meta data. Therefore, call xsk_tx_metadata_complete() only when meta data is actually used. Tested on imx93 without XDP, with XDP and with XDP/ZC. Fixes: 1347b419318d ("net: stmmac: Add Tx HWTS support to XDP ZC") Suggested-by: Serge Semin Tested-by: Serge Semin Link: https://lore.kernel.org/netdev/87r0h7wg8u.fsf@kurt.kurt.home/ Acked-by: Stanislav Fomichev Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20240222-stmmac_xdp-v2-1-4beee3a037e4@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e80d77bd9f1f..8b77c0952071 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2672,7 +2672,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, } if (skb) { stmmac_get_tx_hwtstamp(priv, p, skb); - } else { + } else if (tx_q->xsk_pool && + xp_tx_metadata_enabled(tx_q->xsk_pool)) { struct stmmac_xsk_tx_complete tx_compl = { .priv = priv, .desc = p, -- cgit v1.2.3 From e3d5d70cb483df8296dd44e9ae3b6355ef86494c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 26 Feb 2024 12:08:20 +0100 Subject: net: lan78xx: fix "softirq work is pending" error Disable BH around the call to napi_schedule() to avoid following error: NOHZ tick-stop error: local softirq work is pending, handler #08!!! Fixes: ec4c7e12396b ("lan78xx: Introduce NAPI polling support") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240226110820.2113584-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index e06193e831a5..ba6c8ac2a736 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; -- cgit v1.2.3 From f41900e4a6ef019d64a70394b0e0c3bd048d4ec8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:52 +0000 Subject: drm/buddy: fix range bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a corner case here where start/end is after/before the block range we are currently checking. If so we need to be sure that splitting the block will eventually give use the block size we need. To do that we should adjust the block range to account for the start/end, and only continue with the split if the size/alignment will fit the requested size. Not doing so can result in leaving split blocks unmerged when it eventually fails. Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: # v5.18+ Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-4-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index c4222b886db7..f3a6ac908f81 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* -- cgit v1.2.3 From 2986314aa811c8a23aeb292edd30315495d54966 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:53 +0000 Subject: drm/buddy: check range allocation matches alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Likely not a big deal for real users, but for consistency we should respect the min_page_size here. Main issue is that bias allocations turns into normal range allocation if the range and size matches exactly, and in the next patch we want to add some unit tests for this part of the api. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-5-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f3a6ac908f81..5ebdd6f8f36e 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return -EINVAL; /* Actual range allocation */ - if (start + size == end) + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } original_size = size; original_min_size = min_block_size; -- cgit v1.2.3 From c70703320e557ff30847915e6a7631a9abdda16b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:54 +0000 Subject: drm/tests/drm_buddy: add alloc_range_bias test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION. v2: - Be consistent with u32 here. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-6-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 218 +++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index edacc1adb28f..1008d5b9d61e 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -14,11 +14,216 @@ #include "../lib/drm_random.h" +static unsigned int random_seed; + static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_contiguous(struct kunit *test) { u32 mm_size, ps = SZ_4K, i, n_pages, total; @@ -363,17 +568,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; -- cgit v1.2.3 From 183420038444547c149a0fc5f58e792c2752860c Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Tue, 27 Feb 2024 00:53:57 +0300 Subject: crypto: sun8i-ce - Fix use after free in unprepare sun8i_ce_cipher_unprepare should be called before crypto_finalize_skcipher_request, because client callbacks may immediately free memory, that isn't needed anymore. But it will be used by unprepare after free. Before removing prepare/unprepare callbacks it was handled by crypto engine in crypto_finalize_request. Usually that results in a pointer dereference problem during a in crypto selftest. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000004716d000 [0000000000000030] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP This problem is detected by KASAN as well. ================================================================== BUG: KASAN: slab-use-after-free in sun8i_ce_cipher_do_one+0x6e8/0xf80 [sun8i_ce] Read of size 8 at addr ffff00000dcdc040 by task 1c15000.crypto-/373 Hardware name: Pine64 PinePhone (1.2) (DT) Call trace: dump_backtrace+0x9c/0x128 show_stack+0x20/0x38 dump_stack_lvl+0x48/0x60 print_report+0xf8/0x5d8 kasan_report+0x90/0xd0 __asan_load8+0x9c/0xc0 sun8i_ce_cipher_do_one+0x6e8/0xf80 [sun8i_ce] crypto_pump_work+0x354/0x620 [crypto_engine] kthread_worker_fn+0x244/0x498 kthread+0x168/0x178 ret_from_fork+0x10/0x20 Allocated by task 379: kasan_save_stack+0x3c/0x68 kasan_set_track+0x2c/0x40 kasan_save_alloc_info+0x24/0x38 __kasan_kmalloc+0xd4/0xd8 __kmalloc+0x74/0x1d0 alg_test_skcipher+0x90/0x1f0 alg_test+0x24c/0x830 cryptomgr_test+0x38/0x60 kthread+0x168/0x178 ret_from_fork+0x10/0x20 Freed by task 379: kasan_save_stack+0x3c/0x68 kasan_set_track+0x2c/0x40 kasan_save_free_info+0x38/0x60 __kasan_slab_free+0x100/0x170 slab_free_freelist_hook+0xd4/0x1e8 __kmem_cache_free+0x15c/0x290 kfree+0x74/0x100 kfree_sensitive+0x80/0xb0 alg_test_skcipher+0x12c/0x1f0 alg_test+0x24c/0x830 cryptomgr_test+0x38/0x60 kthread+0x168/0x178 ret_from_fork+0x10/0x20 The buggy address belongs to the object at ffff00000dcdc000 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 64 bytes inside of freed 256-byte region [ffff00000dcdc000, ffff00000dcdc100) Signed-off-by: Andrey Skvortsov Fixes: 4136212ab18e ("crypto: sun8i-ce - Remove prepare/unprepare request") Cc: Signed-off-by: Herbert Xu --- .../crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 1262a7773ef3..de50c00ba218 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -299,22 +299,6 @@ theend: return err; } -static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) -{ - struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); - struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); - struct sun8i_ce_dev *ce = op->ce; - struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); - int flow, err; - - flow = rctx->flow; - err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); - local_bh_disable(); - crypto_finalize_skcipher_request(engine, breq, err); - local_bh_enable(); -} - static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_req) { @@ -360,6 +344,23 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE); } +static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ce_dev *ce = op->ce; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); + int flow, err; + + flow = rctx->flow; + err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); + sun8i_ce_cipher_unprepare(engine, areq); + local_bh_disable(); + crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); +} + int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) { int err = sun8i_ce_cipher_prepare(engine, areq); @@ -368,7 +369,6 @@ int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) return err; sun8i_ce_cipher_run(engine, areq); - sun8i_ce_cipher_unprepare(engine, areq); return 0; } -- cgit v1.2.3 From 943d4bd67950685901addfa7b07aa3408ce17e7f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 25 Feb 2024 23:48:20 -0800 Subject: net: ethernet: adi: move PHYLIB from vendor to driver symbol In a previous patch I added "select PHYLIB" at the wrong place for the ADIN1110 driver symbol, so move it to its correct place under the ADIN1110 kconfig symbol. Fixes: a9f80df4f514 ("net: ethernet: adi: requires PHYLIB support") Signed-off-by: Randy Dunlap Reported-by: Michal Kubecek Closes: https://lore.kernel.org/lkml/77012b38-4b49-47f4-9a88-d773d52909ad@infradead.org/T/#m8ba397484738711edc0ad607b2c63ca02244e3c3 Cc: Lennart Franzen Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Cc: Nuno Sa Tested-by: Michal Kubecek Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index c91b4dcef4ec..760a9a60bc15 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -7,7 +7,6 @@ config NET_VENDOR_ADI bool "Analog Devices devices" default y depends on SPI - select PHYLIB help If you have a network (Ethernet) card belonging to this class, say Y. @@ -22,6 +21,7 @@ config ADIN1110 tristate "Analog Devices ADIN1110 MAC-PHY" depends on SPI && NET_SWITCHDEV select CRC8 + select PHYLIB help Say yes here to build support for Analog Devices ADIN1110 Low Power 10BASE-T1L Ethernet MAC-PHY. -- cgit v1.2.3 From 8af411bbba1f457c33734795f024d0ef26d0963f Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 26 Feb 2024 17:42:32 +0100 Subject: stmmac: Clear variable when destroying workqueue Currently when suspending driver and stopping workqueue it is checked whether workqueue is not NULL and if so, it is destroyed. Function destroy_workqueue() does drain queue and does clear variable, but it does not set workqueue variable to NULL. This can cause kernel/module panic if code attempts to clear workqueue that was not initialized. This scenario is possible when resuming suspended driver in stmmac_resume(), because there is no handling for failed stmmac_hw_setup(), which can fail and return if DMA engine has failed to initialize, and workqueue is initialized after DMA engine. Should DMA engine fail to initialize, resume will proceed normally, but interface won't work and TX queue will eventually timeout, causing 'Reset adapter' error. This then does destroy workqueue during reset process. And since workqueue is initialized after DMA engine and can be skipped, it will cause kernel/module panic. To secure against this possible crash, set workqueue variable to NULL when destroying workqueue. Log/backtrace from crash goes as follows: [88.031977]------------[ cut here ]------------ [88.031985]NETDEV WATCHDOG: eth0 (sxgmac): transmit queue 1 timed out [88.032017]WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:477 dev_watchdog+0x390/0x398 [88.032251]---[ end trace e70de432e4d5c2c0 ]--- [88.032282]sxgmac 16d88000.ethernet eth0: Reset adapter. [88.036359]------------[ cut here ]------------ [88.036519]Call trace: [88.036523] flush_workqueue+0x3e4/0x430 [88.036528] drain_workqueue+0xc4/0x160 [88.036533] destroy_workqueue+0x40/0x270 [88.036537] stmmac_fpe_stop_wq+0x4c/0x70 [88.036541] stmmac_release+0x278/0x280 [88.036546] __dev_close_many+0xcc/0x158 [88.036551] dev_close_many+0xbc/0x190 [88.036555] dev_close.part.0+0x70/0xc0 [88.036560] dev_close+0x24/0x30 [88.036564] stmmac_service_task+0x110/0x140 [88.036569] process_one_work+0x1d8/0x4a0 [88.036573] worker_thread+0x54/0x408 [88.036578] kthread+0x164/0x170 [88.036583] ret_from_fork+0x10/0x20 [88.036588]---[ end trace e70de432e4d5c2c1 ]--- [88.036597]Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 Fixes: 5a5586112b929 ("net: stmmac: support FPE link partner hand-shaking procedure") Signed-off-by: Jakub Raczynski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8b77c0952071..7c6aef033a45 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4006,8 +4006,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } -- cgit v1.2.3 From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 22:09:30 +0200 Subject: mmc: sdhci-xenon: fix PHY init clock stability Each time SD/mmc phy is initialized, at times, in some of the attempts, phy fails to completes its initialization which results into timeout error. Per the HW spec, it is a pre-requisite to ensure a stable SD clock before a phy initialization is attempted. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon-phy.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de65..c3096230a969 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" @@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); -- cgit v1.2.3 From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 21:17:14 +0200 Subject: mmc: sdhci-xenon: add timeout for PHY init complete AC5X spec says PHY init complete bit must be polled until zero. We see cases in which timeout can take longer than the standard calculation on AC5X, which is expected following the spec comment above. According to the spec, we must wait as long as it takes for that bit to toggle on AC5X. Cap that with 100 delay loops so we won't get stuck forever. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon-phy.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index c3096230a969..cc9d28b75eb9 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -110,6 +110,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 -- cgit v1.2.3 From 664bad6af3cbe01d6804b7264bee674b3e7dae7e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 28 Feb 2024 00:08:08 +0200 Subject: Revert "drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes" This reverts commit e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes"). The commit changed the way how the MSM DP driver communicates HPD-related events to the userspace. The mentioned commit made some of the HPD events being reported earlier. This way userspace starts poking around. It interacts in a bad way with the dp_bridge_detect and the driver's state machine, ending up either with the very long delays during hotplug detection or even inability of the DP driver to report the display as connected. A proper fix will involve redesigning of the HPD handling in the MSM DP driver. It is underway, but it will be intrusive and can not be thought about as a simple fix for the issue. Thus, revert the offending commit. Fixes: e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes") Link: https://gitlab.freedesktop.org/drm/msm/-/issues/50 Reported-by: Johan Hovold Link: https://lore.kernel.org/r/Zd3YPGmrprxv-N-O@hovoldconsulting.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Paloma Arellano Tested-by: Johan Hovold Tested-by: Neil Armstrong # on SM8650-HDK Patchwork: https://patchwork.freedesktop.org/patch/580313/ Link: https://lore.kernel.org/r/20240227220808.50146-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/dp/dp_display.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d37d599aec27..4c72124ffb5d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; +static void dp_display_send_hpd_event(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + struct drm_connector *connector; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + connector = dp->dp_display.connector; + drm_helper_hpd_irq_event(connector->dev); +} + static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - struct drm_bridge *bridge = dp->dp_display.bridge; + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { + drm_dbg_dp(dp->drm_dev, "HPD already %s\n", + (hpd ? "on" : "off")); + return 0; + } /* reset video pattern flag on disconnect */ if (!hpd) { @@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); + dp_display_send_hpd_event(&dp->dp_display); return 0; } -- cgit v1.2.3 From c17d2a7b216e168c3ba62d93482179c01b369ac7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 27 Dec 2023 11:10:03 +0100 Subject: Bluetooth: hci_bcm4377: do not mark valid bd_addr as invalid A recent commit restored the original (and still documented) semantics for the HCI_QUIRK_USE_BDADDR_PROPERTY quirk so that the device address is considered invalid unless an address is provided by firmware. This specifically means that this flag must only be set for devices with invalid addresses, but the Broadcom BCM4377 driver has so far been setting this flag unconditionally. Fortunately the driver already checks for invalid addresses during setup and sets the HCI_QUIRK_INVALID_BDADDR flag, which can simply be replaced with HCI_QUIRK_USE_BDADDR_PROPERTY to indicate that the default address is invalid but can be overridden by firmware (long term, this should probably just always be allowed). Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk") Cc: stable@vger.kernel.org # 6.5 Reported-by: Felix Zhang Link: https://lore.kernel.org/r/77419ffacc5b4875e920e038332575a2a5bff29f.camel@mrman314.tech/ Signed-off-by: Johan Hovold Reported-by: Felix Zhang Reviewed-by: Neal Gompa Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_bcm4377.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index a61757835695..9a7243d5db71 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) - set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks); + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); kfree_skb(skb); return 0; @@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) hdev->set_bdaddr = bcm4377_hci_set_bdaddr; hdev->setup = bcm4377_hci_setup; - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); if (bcm4377->hw->broken_mws_transport_config) set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); if (bcm4377->hw->broken_ext_scan) -- cgit v1.2.3 From c0dbc56077ae759f2dd602c7561480bc2b1b712c Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 19 Jan 2024 17:45:30 +0800 Subject: Bluetooth: qca: Fix wrong event type for patch config command Vendor-specific command patch config has HCI_Command_Complete event as response, but qca_send_patch_config_cmd() wrongly expects vendor-specific event for the command, fixed by using right event type. Btmon log for the vendor-specific command are shown below: < HCI Command: Vendor (0x3f|0x0000) plen 5 28 01 00 00 00 > HCI Event: Command Complete (0x0e) plen 5 Vendor (0x3f|0x0000) ncmd 1 Status: Success (0x00) 28 Fixes: 4fac8a7ac80b ("Bluetooth: btqca: sequential validation") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index fdb0fae88d1c..b40b32fa7f1c 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); -- cgit v1.2.3 From 7dcd3e014aa7faeeaf4047190b22d8a19a0db696 Mon Sep 17 00:00:00 2001 From: Janaki Ramaiah Thota Date: Wed, 24 Jan 2024 20:00:42 +0530 Subject: Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT BT adapter going into UNCONFIGURED state during BT turn ON when devicetree has no local-bd-address node. Bluetooth will not work out of the box on such devices, to avoid this problem, added check to set HCI_QUIRK_USE_BDADDR_PROPERTY based on local-bd-address node entry. When this quirk is not set, the public Bluetooth address read by host from controller though HCI Read BD Address command is considered as valid. Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Signed-off-by: Janaki Ramaiah Thota Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 94b8c406f0c0..06193546ebb6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1904,7 +1905,17 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); -- cgit v1.2.3 From 6abf9dd26bb1699c17d601b9a292577d01827c0e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 26 Jan 2024 17:00:24 +0800 Subject: Bluetooth: qca: Fix triggering coredump implementation hci_coredump_qca() uses __hci_cmd_sync() to send a vendor-specific command to trigger firmware coredump, but the command does not have any event as its sync response, so it is not suitable to use __hci_cmd_sync(), fixed by using __hci_cmd_send(). Fixes: 06d3fdfcdf5c ("Bluetooth: hci_qca: Add qcom devcoredump support") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 06193546ebb6..edd2a81b4d5e 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1807,13 +1807,12 @@ static int qca_power_on(struct hci_dev *hdev) static void hci_coredump_qca(struct hci_dev *hdev) { + int err; static const u8 param[] = { 0x26 }; - struct sk_buff *skb; - skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - bt_dev_err(hdev, "%s: trigger crash failed (%ld)", __func__, PTR_ERR(skb)); - kfree_skb(skb); + err = __hci_cmd_send(hdev, 0xfc0c, 1, param); + if (err < 0) + bt_dev_err(hdev, "%s: trigger crash failed (%d)", __func__, err); } static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id) -- cgit v1.2.3 From 2a93c6cbd5a703d44c414a3c3945a87ce11430ba Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 26 Feb 2024 17:49:57 -0800 Subject: pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation Commit 'e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain")' aimed to make sure that a power-domain that is being enabled without any particular performance-state requested will at least turn the rail on, to avoid filling DeviceTree with otherwise unnecessary required-opps properties. But in the event that aggregation happens on a disabled power-domain, with an enabled peer without performance-state, both the local and peer corner are 0. The peer's enabled_corner is not considered, with the result that the underlying (shared) resource is disabled. One case where this can be observed is when the display stack keeps mmcx enabled (but without a particular performance-state vote) in order to access registers and sync_state happens in the rpmhpd driver. As mmcx_ao is flushed the state of the peer (mmcx) is not considered and mmcx_ao ends up turning off "mmcx.lvl" underneath mmcx. This has been observed several times, but has been painted over in DeviceTree by adding an explicit vote for the lowest non-disabled performance-state. Fixes: e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain") Reported-by: Johan Hovold Closes: https://lore.kernel.org/linux-arm-msm/ZdMwZa98L23mu3u6@hovoldconsulting.com/ Cc: Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240226-rpmhpd-enable-corner-fix-v1-1-68c004cec48c@quicinc.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/qcom/rpmhpd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c index 3078896b1300..47df910645f6 100644 --- a/drivers/pmdomain/qcom/rpmhpd.c +++ b/drivers/pmdomain/qcom/rpmhpd.c @@ -692,6 +692,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; if (pd->state_synced) { to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); @@ -701,9 +702,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) this_sleep_corner = pd->level_count - 1; } - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); -- cgit v1.2.3 From fc9a615200d48e076af58f4309f507e500ed900d Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 28 Feb 2024 13:27:20 +0800 Subject: drm: tests: Fix invalid printf format specifiers in KUnit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_buddy_test's alloc_contiguous test used a u64 for the page size, which was then updated to be an 'unsigned long' to avoid 64-bit multiplication division helpers. However, the variable is logged by some KUNIT_ASSERT_EQ_MSG() using the '%d' or '%llu' format specifiers, the former of which is always wrong, and the latter is no longer correct now that ps is no longer a u64. Fix these to all use '%lu'. Also, drm_mm_test calls KUNIT_FAIL() with an empty string as the message. gcc and clang warns if a printf format string is empty, so give these some more detailed error messages, which should be more useful anyway. Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") Fixes: fca7526b7d89 ("drm/tests/drm_buddy: fix build failure on 32-bit targets") Fixes: fc8d29e298cf ("drm: selftest: convert drm_mm selftest to KUnit") Reviewed-by: Matthew Auld Acked-by: Christian König Tested-by: Guenter Roeck Reviewed-by: Justin Stitt Signed-off-by: David Gow Signed-off-by: Shuah Khan --- drivers/gpu/drm/tests/drm_buddy_test.c | 14 +++++++------- drivers/gpu/drm/tests/drm_mm_test.c | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index 2f32fb2f12e7..3dbfa3078449 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -55,30 +55,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, 0), - "buddy_alloc hit an error size=%u\n", + "buddy_alloc hit an error size=%lu\n", ps); } while (++i < n_pages); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%u\n", 3 * ps); + "buddy_alloc didn't error size=%lu\n", 3 * ps); drm_buddy_free_list(&mm, &middle); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%u\n", 3 * ps); + "buddy_alloc didn't error size=%lu\n", 3 * ps); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%u\n", 2 * ps); + "buddy_alloc didn't error size=%lu\n", 2 * ps); drm_buddy_free_list(&mm, &right); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%u\n", 3 * ps); + "buddy_alloc didn't error size=%lu\n", 3 * ps); /* * At this point we should have enough contiguous space for 2 blocks, * however they are never buddies (since we freed middle and right) so @@ -87,13 +87,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%u\n", 2 * ps); + "buddy_alloc hit an error size=%lu\n", 2 * ps); drm_buddy_free_list(&mm, &left); KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%u\n", 3 * ps); + "buddy_alloc hit an error size=%lu\n", 3 * ps); total = 0; list_for_each_entry(block, &allocated, link) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 1eb0c304f960..f37c0d765865 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -157,7 +157,7 @@ static void drm_test_mm_init(struct kunit *test) /* After creation, it should all be one massive hole */ if (!assert_one_hole(test, &mm, 0, size)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm not one hole on creation"); goto out; } @@ -171,14 +171,14 @@ static void drm_test_mm_init(struct kunit *test) /* After filling the range entirely, there should be no holes */ if (!assert_no_holes(test, &mm)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm has holes when filled"); goto out; } /* And then after emptying it again, the massive hole should be back */ drm_mm_remove_node(&tmp); if (!assert_one_hole(test, &mm, 0, size)) { - KUNIT_FAIL(test, ""); + KUNIT_FAIL(test, "mm does not have single hole after emptying"); goto out; } -- cgit v1.2.3 From b7cdccc6a849568775f738b1e233f751a8fed013 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Wed, 28 Feb 2024 11:39:21 -0700 Subject: drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels' ext caps don't write initial values. The value of dpcd_addr (0x317) can be random and the backlight control interface will be incorrect. [HOW] Add new panel patches to remove sink ext caps. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org # 6.5.x Cc: Tsung-hua Lin Cc: Chris Chi Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Ryan Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 85b7f58a7f35..c27063305a13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A): + case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1): DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; @@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->edid_hdmi = connector->display_info.is_hdmi; + apply_edid_quirks(edid_buf, edid_caps); + sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) return result; @@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; - apply_edid_quirks(edid_buf, edid_caps); - kfree(sads); kfree(sadb); -- cgit v1.2.3 From 1221b9e982e181f1c37789c46fe5bfe32d97bec4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2024 15:55:38 +0800 Subject: ublk: improve getting & putting ublk device Firstly convert get_device() and put_device() into ublk_get_device() and ublk_put_device(). Secondly annotate ublk_get_device() & ublk_put_device() as noinline for trace, especially it is often to trigger device deletion hang when incorrect order is used on ublkc mmap, ublkc close, io_uring_sqe_unregister_file, ublkb close. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240223075539.89945-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 01afe90a47ac..06d88d2008ba 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -605,14 +605,16 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_NEED_GET_DATA; } -static struct ublk_device *ublk_get_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub) { if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) return ub; return NULL; } -static void ublk_put_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline void ublk_put_device(struct ublk_device *ub) { put_device(&ub->cdev_dev); } @@ -671,7 +673,7 @@ static void ublk_free_disk(struct gendisk *disk) struct ublk_device *ub = disk->private_data; clear_bit(UB_STATE_USED, &ub->state); - put_device(&ub->cdev_dev); + ublk_put_device(ub); } static void ublk_store_owner_uid_gid(unsigned int *owner_uid, @@ -2142,7 +2144,7 @@ static void ublk_remove(struct ublk_device *ub) cancel_work_sync(&ub->stop_work); cancel_work_sync(&ub->quiesce_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); - put_device(&ub->cdev_dev); + ublk_put_device(ub); ublks_added--; } @@ -2235,7 +2237,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (ub->nr_privileged_daemon != ub->nr_queues_ready) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); - get_device(&ub->cdev_dev); + ublk_get_device(ub); ub->dev_info.state = UBLK_S_DEV_LIVE; if (ublk_dev_is_zoned(ub)) { -- cgit v1.2.3 From 13fe8e6825e44129b6cbeee41d3012554bf8d687 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2024 15:55:39 +0800 Subject: ublk: add UBLK_CMD_DEL_DEV_ASYNC The current command UBLK_CMD_DEL_DEV won't return until the device is released, this way looks more reliable, but makes userspace more difficult to implement, especially about orders: unmap command buffer(which holds one ublkc reference), ublkc close, io_uring_file_unregister, ublkb close. Add UBLK_CMD_DEL_DEV_ASYNC so that device deletion won't wait release, then userspace needn't worry about the above order. Actually both loop and nbd is deleted in this async way. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240223075539.89945-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 9 ++++++--- include/uapi/linux/ublk_cmd.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 06d88d2008ba..bea3d5cf8a83 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2468,7 +2468,7 @@ static inline bool ublk_idr_freed(int id) return ptr == NULL; } -static int ublk_ctrl_del_dev(struct ublk_device **p_ub) +static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait) { struct ublk_device *ub = *p_ub; int idx = ub->ub_number; @@ -2502,7 +2502,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) * - the device number is freed already, we will not find this * device via ublk_get_device_from_id() */ - if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) + if (wait && wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) return -EINTR; return 0; } @@ -2901,7 +2901,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_add_dev(cmd); break; case UBLK_CMD_DEL_DEV: - ret = ublk_ctrl_del_dev(&ub); + ret = ublk_ctrl_del_dev(&ub, true); + break; + case UBLK_U_CMD_DEL_DEV_ASYNC: + ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: ret = ublk_ctrl_get_queue_affinity(ub, cmd); diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index b9cfc5c96268..c8dc5f8ea699 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -49,6 +49,8 @@ _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_GET_FEATURES \ _IOR('u', 0x13, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_DEL_DEV_ASYNC \ + _IOR('u', 0x14, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of -- cgit v1.2.3 From 0bb7b09392eb74b152719ae87b1ba5e4bf910ef0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Feb 2024 10:49:41 -0800 Subject: igb: extend PTP timestamp adjustments to i211 The i211 requires the same PTP timestamp adjustments as the i210, according to its datasheet. To ensure consistent timestamping across different platforms, this change extends the existing adjustments to include the i211. The adjustment result are tested and comparable for i210 and i211 based systems. Fixes: 3f544d2a4d5c ("igb: adjust PTP timestamps for Tx/Rx latency") Signed-off-by: Oleksij Rempel Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240227184942.362710-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 319c544b9f04..f94570556120 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -957,7 +957,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1003,6 +1003,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1022,7 +1023,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; -- cgit v1.2.3 From 7e10d87e63f7f9c324d533bb4369e35bb19ab9a9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 Feb 2024 14:30:18 +0100 Subject: drm/xe: Add uapi for dumpable bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the flag XE_VM_BIND_FLAG_DUMPABLE to notify devcoredump that this mapping should be dumped. This is not hooked up, but the uapi should be ready before merging. It's likely easier to dump the contents of the bo's at devcoredump readout time, so it's better if the bos will stay unmodified after a hang. The NEEDS_CPU_MAPPING flag is removed as requirement. Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240221133024.898315-3-maarten.lankhorst@linux.intel.com (cherry picked from commit 76a86b58d2b3de31e88acb487ebfa0c3cc7c41d2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 921ca28d49dd..945c89b5e4b5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2722,7 +2722,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define SUPPORTED_FLAGS \ (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL) + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6d11ee9e571a..4f010949f812 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -931,6 +931,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From eaa367a0317ea4cbc7aa60f25829c89c0e12717b Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com (cherry picked from commit 84a1ed5e67565b09b8fd22a26754d2897de55ce0) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec_queue.c | 88 ++------------------------------ drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 ---- drivers/gpu/drm/xe/xe_lrc.c | 10 +--- drivers/gpu/drm/xe/xe_vm.c | 12 +---- drivers/gpu/drm/xe/xe_vm_types.h | 4 -- include/uapi/drm/xe_drm.h | 19 ------- 6 files changed, 6 insertions(+), 137 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 3acfd4f07666..49223026c89f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -309,85 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return q->ops->set_timeslice(q, value); } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_preempt_timeout(q, value); -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_job_timeout(q, value); -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -395,11 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -418,7 +334,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 947bbc4b285d..36f4901d8d7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -150,16 +150,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 0ec5ad2539f1..b38319d2801e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 945c89b5e4b5..1d82616aa935 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2117,10 +2117,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { @@ -2313,8 +2309,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; @@ -2445,7 +2439,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2720,9 +2714,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a603cc2eb56b..0f220b5d2e7b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -288,10 +288,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @pat_index: The pat index to use for this operation. */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index a7274a99d456..bb0c8a994116 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -831,10 +831,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -927,8 +923,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1045,19 +1039,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From dc15bd0aa7b5ba77bb216394b368c6f9aedbf2f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:19 -0800 Subject: drm/xe: Fix execlist splat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although execlist submission is not supported it should be kept in a basic working state as it can be used for very early hardware bring up. Fix the below splat. WARNING: CPU: 3 PID: 11 at drivers/gpu/drm/xe/xe_execlist.c:217 execlist_run_job+0x1c2/0x220 [xe] Modules linked in: xe drm_kunit_helpers drm_gpuvm drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_buddy gpu_sched mei_pxp mei_hdcp wmi_bmof x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core video snd_pcm mei_me mei wmi fuse e1000e i2c_i801 ptp i2c_smbus pps_core intel_lpss_pci CPU: 3 PID: 11 Comm: kworker/u16:0 Tainted: G U 6.8.0-rc3-guc+ #1046 Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 Workqueue: rcs0 drm_sched_run_job_work [gpu_sched] RIP: 0010:execlist_run_job+0x1c2/0x220 [xe] Code: 8b f8 03 00 00 4c 89 39 e9 e2 fe ff ff 49 8d 7d 20 be ff ff ff ff e8 ed fd a6 e1 85 c0 0f 85 e1 fe ff ff 0f 0b e9 da fe ff ff <0f> 0b 0f 0b 41 83 fc 03 0f 86 8a fe ff ff 0f 0b e9 83 fe ff ff be RSP: 0018:ffffc9000013bdb8 EFLAGS: 00010246 RAX: ffff888105021a00 RBX: ffff888105078400 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffc9000013bd14 RDI: ffffc90001609090 RBP: ffff88811e3f0040 R08: 0000000000000088 R09: 00000000ffffff81 R10: 0000000000000001 R11: ffff88810c10c000 R12: 00000000fffffffe R13: ffff888109b72c28 R14: ffff8881050784a0 R15: ffff888105078408 FS: 0000000000000000(0000) GS:ffff88849f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563459d130f8 CR3: 000000000563a001 CR4: 0000000000f70ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x7f/0x170 ? execlist_run_job+0x1c2/0x220 [xe] ? report_bug+0x1c7/0x1d0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? execlist_run_job+0x1c2/0x220 [xe] ? execlist_run_job+0x2c/0x220 [xe] drm_sched_run_job_work+0x246/0x3f0 [gpu_sched] ? process_one_work+0x18d/0x4e0 process_one_work+0x1f7/0x4e0 worker_thread+0x1da/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x130 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 9b9529ce379a ("drm/xe: Rename engine to exec_queue") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-2-matthew.brost@intel.com (cherry picked from commit ddadc7120d4be7a40a9745924339c472c5850d14) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 42d01bbbf7d0..acb4d9f38fd7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); -- cgit v1.2.3 From ccff0b21ebe0cbe3f402edb27b0b1fd22a9d08aa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:21 -0800 Subject: drm/xe: Don't support execlists in xe_gt_tlb_invalidation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_gt_tlb_invalidation layer implements TLB invalidations for a GuC backend. Simply return if in execlists mode. A follow up may properly implement the xe_gt_tlb_invalidation layer for both GuC and execlists. Fixes: a9351846d945 ("drm/xe: Break of TLB invalidation into its own file") Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-4-matthew.brost@intel.com (cherry picked from commit a9e483dda3efa5b9aae5d9eef94d2c3a878d9bea) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 7eef23a00d77..f4c485289dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct drm_printer p = drm_err_printer(__func__); int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order -- cgit v1.2.3 From a41f6b0db58fe3cc2686e4065db48ebf44effa36 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Feb 2024 07:55:54 -0800 Subject: drm/xe: Use vmalloc for array of bind allocation in bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vmalloc in effort to allow a user pass in a large number of binds in an IOCTL (mesa use case). Also use array allocations rather open coding the size calculation. v2: Use __GFP_ACCOUNT for allocations (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240226155554.103384-1-matthew.brost@intel.com (cherry picked from commit 35ed1d2bfff7b1969e7f99f3641a83ea54f037e2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1d82616aa935..041b29439c4b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2740,8 +2740,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2831,7 +2832,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2919,13 +2920,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3066,10 +3069,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3093,10 +3096,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } -- cgit v1.2.3 From 14d4d0ad0ab5aa980cf71a82da1297b28b274de1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Feb 2024 16:53:53 -0800 Subject: drm/xe: get rid of MAX_BINDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa has been issuing a single bind operation per ioctl since xe.ko changed to GPUVA due xe.ko bug #746. If I change Mesa to try again to issue every single bind operation it can in the same ioctl, it hits the MAX_BINDS assertion when running Vulkan conformance tests. Test dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 issues 960 bind operations in a single ioctl, it's the most I could find in the conformance suite. I don't see a reason to keep the MAX_BINDS restriction: it doesn't seem to be preventing any specific issue. If the number is too big for the memory allocations, then those will fail. Nothing related to num_binds seems to be using the stack. Let's just get rid of it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Testcase: dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746 Cc: Matthew Brost Signed-off-by: Paulo Zanoni Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240215005353.1295420-1-paulo.r.zanoni@intel.com (cherry picked from commit ba6bbdc6eaef92998ec7f323c9e1211d344d2556) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 041b29439c4b..75b44777067e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2719,8 +2719,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2732,8 +2730,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { -- cgit v1.2.3 From 12cb2b21c2d037a4299028fc56ac941185992e5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:37 +0100 Subject: drm/xe/mmio: fix build warning for BAR resize on 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang complains about a nonsensical test on builds with a 32-bit phys_addr_t, which means resizing will always fail: drivers/gpu/drm/xe/xe_mmio.c:109:23: error: result of comparison of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 109 | root_res->start > 0x100000000ull) | ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Previously, BAR resize was always disallowed on 32-bit kernels, but this apparently changed recently. Since 32-bit machines can in theory support PAE/LPAE for large address spaces, this may end up useful, so change the driver to shut up the warning but still work when phys_addr_t/resource_size_t is 64 bit wide. Fixes: 9a6e6c14bfde ("drm/xe/mmio: Use non-atomic writeq/readq variant for 32b") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-2-arnd@kernel.org Signed-off-by: Lucas De Marchi (cherry picked from commit f5d3983366c0b88ec388b3407b29c1c0862ee2b8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5f6b53ea5528..02f7808f28ca 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } -- cgit v1.2.3 From a09946a9a903e809abab9e0fb813dbf5a32084f5 Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Tue, 20 Feb 2024 10:17:48 +0530 Subject: drm/xe/xe_bo_move: Enhance xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhanced xe_bo_move trace to be more readable. It will help to show the migration details. Src and dst details. v2: Modify trace_xe_bo_move(), it takes the integer mem_type rather than a string. Make mem_type_to_name() extern, it will be used by trace.(Thomas) v3: Move mem_type_to_name() to xe_bo.[ch] (Thomas, Matt) v4: Add device details to reduce ambiquity related to vram0/vram1. (Oak) v5: Rename mem_type_to_name to xe_mem_type_to_name. (Thomas) v6: Optimised code to use xe_bo_device(__entry->bo). (Thomas) Cc: Thomas Hellström Cc: Oak Zeng Cc: Kempczynski Zbigniew Cc: Matthew Brost Cc: Brian Welty Signed-off-by: Priyanka Dandamudi Reviewed-by: Oak Zeng Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240220044748.948496-1-priyanka.dandamudi@intel.com (cherry picked from commit a0df2cc858c309a8bc2e87b4274772587aa25e05) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_drm_client.c | 12 ++---------- drivers/gpu/drm/xe/xe_trace.h | 25 ++++++++++++++++++++++--- 4 files changed, 34 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0b0e262e2166..f2ea188663ac 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -28,6 +28,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [XE_PL_STOLEN] = "stolen" +}; + static const struct ttm_place sys_placement_flags = { .fpfn = 0, .lpfn = 0, @@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - - trace_xe_bo_move(bo); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9b1279aca127..8be42ac6cd07 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); extern struct ttm_device_funcs xe_ttm_funcs; +extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 82d1305e831f..6040e4d22b28 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, static void show_meminfo(struct drm_printer *p, struct drm_file *file) { - static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { - [XE_PL_SYSTEM] = "system", - [XE_PL_TT] = "gtt", - [XE_PL_VRAM0] = "vram0", - [XE_PL_VRAM1] = "vram1", - [4 ... 6] = NULL, - [XE_PL_STOLEN] = "stolen" - }; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct xe_file *xef = file->driver_priv; struct ttm_device *bdev = &xef->xe->ttm; @@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) spin_unlock(&client->bos_lock); for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!mem_type_to_name[mem_type]) + if (!xe_mem_type_to_name[mem_type]) continue; man = ttm_manager_type(bdev, mem_type); @@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), - mem_type_to_name[mem_type]); + xe_mem_type_to_name[mem_type]); } } } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 95163c303f3e..0cce98a6b14b 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,6 +12,7 @@ #include #include +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" @@ -100,9 +101,27 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); -DEFINE_EVENT(xe_bo, xe_bo_move, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), + TP_ARGS(bo, new_placement, old_placement), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + ), + TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); DECLARE_EVENT_CLASS(xe_exec_queue, -- cgit v1.2.3 From 4ca5c82988e73f51587e2d7564d44f99429c111a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Feb 2024 06:41:24 -0800 Subject: drm/xe: Use pointers in trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a0df2cc858c3 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") inadvertently reverted commit 8d038f49c1f3 ("drm/xe: Fix cast on trace variable"), breaking the build on 32bits. As noted by Ville, there's no point in converting the pointers to u64 and add casts everywhere. In fact, it's better to just use %p and let the address be hashed. Convert all the cases in xe_trace.h to use pointers. Cc: Ville Syrjälä Cc: Matt Roper Cc: Priyanka Dandamudi Cc: Oak Zeng Cc: Thomas Hellström Signed-off-by: Lucas De Marchi Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240222144125.2862546-1-lucas.demarchi@intel.com (cherry picked from commit 7a975748d4dc0a524c99a390c6f74b7097ef8cf7) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_trace.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 0cce98a6b14b..3b97633d81d8 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -27,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_ARGS(fence), TP_STRUCT__entry( - __field(u64, fence) + __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( - __entry->fence = (u64)fence; + __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=0x%016llx, seqno=%d", + TP_printk("fence=%p, seqno=%d", __entry->fence, __entry->seqno) ); @@ -83,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, TP_STRUCT__entry( __field(size_t, size) __field(u32, flags) - __field(u64, vm) + __field(struct xe_vm *, vm) ), TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (unsigned long)bo->vm; + __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=%p", __entry->size, __entry->flags, __entry->vm) ); @@ -346,16 +346,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_STRUCT__entry( __field(u64, ctx) __field(u32, seqno) - __field(u64, fence) + __field(struct xe_hw_fence *, fence) ), TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (unsigned long)fence; + __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", __entry->ctx, __entry->fence, __entry->seqno) ); @@ -384,7 +384,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( - __field(u64, vma) + __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -392,14 +392,14 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (unsigned long)vma; + __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -484,16 +484,16 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( - __field(u64, vm) + __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( - __entry->vm = (unsigned long)vm; + __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + TP_printk("vm=%p, asid=0x%05x", __entry->vm, __entry->asid) ); -- cgit v1.2.3 From 86b3cd6d0713b3b1cb4e17dbddd4d4a2bff98d60 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:51 +0200 Subject: drm/xe: Expose user fence from xe_sync_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing getting reference to user fence, we can control the lifetime outside of sync entries. This is needed to allow vma to track the associated user fence that was provided with bind ioctl. v2: xe_user_fence can be kept opaque (Jani, Matt) v3: indent fix (Matt) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-2-mika.kuoppala@linux.intel.com (cherry picked from commit 977e5b82e0901480bc201342d39f855fc0a2ef47) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_sync.c | 58 +++++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_sync.h | 4 +++ drivers/gpu/drm/xe/xe_sync_types.h | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; -- cgit v1.2.3 From 785f4cc0689f32ab615f043d7889d17eb4f37061 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:52 +0200 Subject: drm/xe: Deny unbinds if uapi ufence pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user fence was provided for MAP in vm_bind_ioctl and it has still not been signalled, deny UNMAP of said vma with EBUSY as long as unsignalled fence exists. This guarantees that MAP vs UNMAP sequences won't escape under the radar if we ever want to track the client's state wrt to completed and accessible MAPs. By means of intercepting the ufence release signalling. v2: find ufence with num_fences > 1 (Matt) v3: careful on clearing vma ufence (Matt) Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1159 Cc: Thomas Hellström Cc: Matthew Brost Cc: Joonas Lahtinen Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-3-mika.kuoppala@linux.intel.com (cherry picked from commit 158900ade92cce5ab85a06d618eb51e6c7ffb28a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 7 +++++++ 2 files changed, 44 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 75b44777067e..3b21afe5b488 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1741,6 +1756,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 0f220b5d2e7b..7300eea5394b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -104,6 +105,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** -- cgit v1.2.3 From 8188cae3cc3d8018ec97ca9ab8caa3acc69a056d Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Wed, 21 Feb 2024 15:49:50 +0530 Subject: drm/xe/xe_trace: Add move_lacks_source detail to xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add move_lacks_source detail to xe_bo_move trace to make it readable that is to check if it is migrate clear or migrate copy. Cc: Thomas Hellström Signed-off-by: Priyanka Dandamudi Reviewed-by: Thomas Hellström Fixes: a09946a9a903 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240221101950.1019312-1-priyanka.dandamudi@intel.com (cherry picked from commit 8034f6b070cc3716e81b1846f8a4ca5339c3f29b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f2ea188663ac..4d3b80ec906d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -721,7 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 3b97633d81d8..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -102,14 +102,16 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, ); TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), - TP_ARGS(bo, new_placement, old_placement), + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) __field(u32, new_placement) __field(u32, old_placement) __array(char, device_id, 12) + __field(bool, move_lacks_source) ), TP_fast_assign( @@ -118,9 +120,11 @@ TRACE_EVENT(xe_bo_move, __entry->new_placement = new_placement; __entry->old_placement = old_placement; strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; ), - TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); -- cgit v1.2.3 From 01bb1ae35006e473138c90711bad1a6b614a1823 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 19 Feb 2024 13:50:47 +0100 Subject: drm/i915: Check before removing mm notifier Error in mmu_interval_notifier_insert() can leave a NULL notifier.mm pointer. Catch that and return early. Fixes: ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.") Cc: # v5.13+ [tursulin: Added Fixes and cc stable.] Cc: Andi Shyti Cc: Shawn Lee Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240219125047.28906-1-nirmoy.das@intel.com Signed-off-by: Tvrtko Ursulin (cherry picked from commit db7bbd13f08774cde0332c705f042e327fe21e73) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1d3ebdf4069b..c08b67593565 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -379,6 +379,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } -- cgit v1.2.3 From 616d82c3cfa2a2146dd7e3ae47bda7e877ee549e Mon Sep 17 00:00:00 2001 From: Alexander Ofitserov Date: Wed, 28 Feb 2024 14:47:03 +0300 Subject: gtp: fix use-after-free and null-ptr-deref in gtp_newlink() The gtp_link_ops operations structure for the subsystem must be registered after registering the gtp_net_ops pernet operations structure. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: [ 1010.702740] gtp: GTP module unloaded [ 1010.715877] general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] SMP KASAN NOPTI [ 1010.715888] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] [ 1010.715895] CPU: 1 PID: 128616 Comm: a.out Not tainted 6.8.0-rc6-std-def-alt1 #1 [ 1010.715899] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 [ 1010.715908] RIP: 0010:gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.715915] Code: 80 3c 02 00 0f 85 41 04 00 00 48 8b bb d8 05 00 00 e8 ed f6 ff ff 48 89 c2 48 89 c5 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 4f 04 00 00 4c 89 e2 4c 8b 6d 00 48 b8 00 00 00 [ 1010.715920] RSP: 0018:ffff888020fbf180 EFLAGS: 00010203 [ 1010.715929] RAX: dffffc0000000000 RBX: ffff88800399c000 RCX: 0000000000000000 [ 1010.715933] RDX: 0000000000000001 RSI: ffffffff84805280 RDI: 0000000000000282 [ 1010.715938] RBP: 000000000000000d R08: 0000000000000001 R09: 0000000000000000 [ 1010.715942] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88800399cc80 [ 1010.715947] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000400 [ 1010.715953] FS: 00007fd1509ab5c0(0000) GS:ffff88805b300000(0000) knlGS:0000000000000000 [ 1010.715958] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1010.715962] CR2: 0000000000000000 CR3: 000000001c07a000 CR4: 0000000000750ee0 [ 1010.715968] PKRU: 55555554 [ 1010.715972] Call Trace: [ 1010.715985] ? __die_body.cold+0x1a/0x1f [ 1010.715995] ? die_addr+0x43/0x70 [ 1010.716002] ? exc_general_protection+0x199/0x2f0 [ 1010.716016] ? asm_exc_general_protection+0x1e/0x30 [ 1010.716026] ? gtp_newlink+0x4d7/0x9c0 [gtp] [ 1010.716034] ? gtp_net_exit+0x150/0x150 [gtp] [ 1010.716042] __rtnl_newlink+0x1063/0x1700 [ 1010.716051] ? rtnl_setlink+0x3c0/0x3c0 [ 1010.716063] ? is_bpf_text_address+0xc0/0x1f0 [ 1010.716070] ? kernel_text_address.part.0+0xbb/0xd0 [ 1010.716076] ? __kernel_text_address+0x56/0xa0 [ 1010.716084] ? unwind_get_return_address+0x5a/0xa0 [ 1010.716091] ? create_prof_cpu_mask+0x30/0x30 [ 1010.716098] ? arch_stack_walk+0x9e/0xf0 [ 1010.716106] ? stack_trace_save+0x91/0xd0 [ 1010.716113] ? stack_trace_consume_entry+0x170/0x170 [ 1010.716121] ? __lock_acquire+0x15c5/0x5380 [ 1010.716139] ? mark_held_locks+0x9e/0xe0 [ 1010.716148] ? kmem_cache_alloc_trace+0x35f/0x3c0 [ 1010.716155] ? __rtnl_newlink+0x1700/0x1700 [ 1010.716160] rtnl_newlink+0x69/0xa0 [ 1010.716166] rtnetlink_rcv_msg+0x43b/0xc50 [ 1010.716172] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716179] ? lock_acquire+0x1fe/0x560 [ 1010.716188] ? netlink_deliver_tap+0x12f/0xd50 [ 1010.716196] netlink_rcv_skb+0x14d/0x440 [ 1010.716202] ? rtnl_fdb_dump+0x9f0/0x9f0 [ 1010.716208] ? netlink_ack+0xab0/0xab0 [ 1010.716213] ? netlink_deliver_tap+0x202/0xd50 [ 1010.716220] ? netlink_deliver_tap+0x218/0xd50 [ 1010.716226] ? __virt_addr_valid+0x30b/0x590 [ 1010.716233] netlink_unicast+0x54b/0x800 [ 1010.716240] ? netlink_attachskb+0x870/0x870 [ 1010.716248] ? __check_object_size+0x2de/0x3b0 [ 1010.716254] netlink_sendmsg+0x938/0xe40 [ 1010.716261] ? netlink_unicast+0x800/0x800 [ 1010.716269] ? __import_iovec+0x292/0x510 [ 1010.716276] ? netlink_unicast+0x800/0x800 [ 1010.716284] __sock_sendmsg+0x159/0x190 [ 1010.716290] ____sys_sendmsg+0x712/0x880 [ 1010.716297] ? sock_write_iter+0x3d0/0x3d0 [ 1010.716304] ? __ia32_sys_recvmmsg+0x270/0x270 [ 1010.716309] ? lock_acquire+0x1fe/0x560 [ 1010.716315] ? drain_array_locked+0x90/0x90 [ 1010.716324] ___sys_sendmsg+0xf8/0x170 [ 1010.716331] ? sendmsg_copy_msghdr+0x170/0x170 [ 1010.716337] ? lockdep_init_map_type+0x2c7/0x860 [ 1010.716343] ? lockdep_hardirqs_on_prepare+0x430/0x430 [ 1010.716350] ? debug_mutex_init+0x33/0x70 [ 1010.716360] ? percpu_counter_add_batch+0x8b/0x140 [ 1010.716367] ? lock_acquire+0x1fe/0x560 [ 1010.716373] ? find_held_lock+0x2c/0x110 [ 1010.716384] ? __fd_install+0x1b6/0x6f0 [ 1010.716389] ? lock_downgrade+0x810/0x810 [ 1010.716396] ? __fget_light+0x222/0x290 [ 1010.716403] __sys_sendmsg+0xea/0x1b0 [ 1010.716409] ? __sys_sendmsg_sock+0x40/0x40 [ 1010.716419] ? lockdep_hardirqs_on_prepare+0x2b3/0x430 [ 1010.716425] ? syscall_enter_from_user_mode+0x1d/0x60 [ 1010.716432] do_syscall_64+0x30/0x40 [ 1010.716438] entry_SYSCALL_64_after_hwframe+0x62/0xc7 [ 1010.716444] RIP: 0033:0x7fd1508cbd49 [ 1010.716452] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ef 70 0d 00 f7 d8 64 89 01 48 [ 1010.716456] RSP: 002b:00007fff18872348 EFLAGS: 00000202 ORIG_RAX: 000000000000002e [ 1010.716463] RAX: ffffffffffffffda RBX: 000055f72bf0eac0 RCX: 00007fd1508cbd49 [ 1010.716468] RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000006 [ 1010.716473] RBP: 00007fff18872360 R08: 00007fff18872360 R09: 00007fff18872360 [ 1010.716478] R10: 00007fff18872360 R11: 0000000000000202 R12: 000055f72bf0e1b0 [ 1010.716482] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 1010.716491] Modules linked in: gtp(+) udp_tunnel ib_core uinput af_packet rfkill qrtr joydev hid_generic usbhid hid kvm_intel iTCO_wdt intel_pmc_bxt iTCO_vendor_support kvm snd_hda_codec_generic ledtrig_audio irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel snd_hda_intel nls_utf8 snd_intel_dspcfg nls_cp866 psmouse aesni_intel vfat crypto_simd fat cryptd glue_helper snd_hda_codec pcspkr snd_hda_core i2c_i801 snd_hwdep i2c_smbus xhci_pci snd_pcm lpc_ich xhci_pci_renesas xhci_hcd qemu_fw_cfg tiny_power_button button sch_fq_codel vboxvideo drm_vram_helper drm_ttm_helper ttm vboxsf vboxguest snd_seq_midi snd_seq_midi_event snd_seq snd_rawmidi snd_seq_device snd_timer snd soundcore msr fuse efi_pstore dm_mod ip_tables x_tables autofs4 virtio_gpu virtio_dma_buf drm_kms_helper cec rc_core drm virtio_rng virtio_scsi rng_core virtio_balloon virtio_blk virtio_net virtio_console net_failover failover ahci libahci libata evdev scsi_mod input_leds serio_raw virtio_pci intel_agp [ 1010.716674] virtio_ring intel_gtt virtio [last unloaded: gtp] [ 1010.716693] ---[ end trace 04990a4ce61e174b ]--- Cc: stable@vger.kernel.org Signed-off-by: Alexander Ofitserov Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228114703.465107-1-oficerovas@altlinux.org Signed-off-by: Paolo Abeni --- drivers/net/gtp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 2129ae42c703..2b5357d94ff5 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1903,26 +1903,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = register_pernet_subsys(>p_net_ops); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_pernet_subsys; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_pernet_subsys: - unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; -- cgit v1.2.3 From d0b06dc48fb15902d7da09c5c0861e7f042a9381 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 29 Feb 2024 22:17:37 +0900 Subject: firewire: core: use long bus reset on gap count error When resetting the bus after a gap count error, use a long rather than short bus reset. IEEE 1394-1995 uses only long bus resets. IEEE 1394a adds the option of short bus resets. When video or audio transmission is in progress and a device is hot-plugged elsewhere on the bus, the resulting bus reset can cause video frame drops or audio dropouts. Short bus resets reduce or eliminate this problem. Accordingly, short bus resets are almost always preferred. However, on a mixed 1394/1394a bus, a short bus reset can trigger an immediate additional bus reset. This double bus reset can be interpreted differently by different nodes on the bus, resulting in an inconsistent gap count after the bus reset. An inconsistent gap count will cause another bus reset, leading to a neverending bus reset loop. This only happens for some bus topologies, not for all mixed 1394/1394a buses. By instead sending a long bus reset after a gap count inconsistency, we avoid the doubled bus reset, restoring the bus to normal operation. Signed-off-by: Adam Goldman Link: https://sourceforge.net/p/linux1394/mailman/message/58741624/ Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 8aaa7fcb2630..401a77e3b5fa 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -500,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } -- cgit v1.2.3 From f6443e0177a55f78e94ccc1a43eb63a023a0b6fd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Feb 2024 13:32:14 +0100 Subject: pinctrl: don't put the reference to GPIO device in pinctrl_pins_show() The call to gpiod_to_gpio_device() does not increase the reference count of the GPIO device struct so it must not be decreased. Remove the buggy __free() decorator. Fixes: 524fc108b895 ("pinctrl: stop using gpiod_to_chip()") Reported-by: David Arcari Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240223123214.288181-1-brgl@bgdev.pl Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index ee56856cb80c..bbcdece83bf4 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1644,7 +1644,7 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) const struct pinctrl_ops *ops = pctldev->desc->pctlops; unsigned int i, pin; #ifdef CONFIG_GPIOLIB - struct gpio_device *gdev __free(gpio_device_put) = NULL; + struct gpio_device *gdev = NULL; struct pinctrl_gpio_range *range; int gpio_num; #endif -- cgit v1.2.3 From 34b567868777e9fd39ec5333969728a7f0cf179c Mon Sep 17 00:00:00 2001 From: Fei Wu Date: Wed, 28 Feb 2024 19:54:25 +0800 Subject: perf: RISCV: Fix panic on pmu overflow handler (1 << idx) of int is not desired when setting bits in unsigned long overflowed_ctrs, use BIT() instead. This panic happens when running 'perf record -e branches' on sophgo sg2042. [ 273.311852] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 [ 273.320851] Oops [#1] [ 273.323179] Modules linked in: [ 273.326303] CPU: 0 PID: 1475 Comm: perf Not tainted 6.6.0-rc3+ #9 [ 273.332521] Hardware name: Sophgo Mango (DT) [ 273.336878] epc : riscv_pmu_ctr_get_width_mask+0x8/0x62 [ 273.342291] ra : pmu_sbi_ovf_handler+0x2e0/0x34e [ 273.347091] epc : ffffffff80aecd98 ra : ffffffff80aee056 sp : fffffff6e36928b0 [ 273.354454] gp : ffffffff821f82d0 tp : ffffffd90c353200 t0 : 0000002ade4f9978 [ 273.361815] t1 : 0000000000504d55 t2 : ffffffff8016cd8c s0 : fffffff6e3692a70 [ 273.369180] s1 : 0000000000000020 a0 : 0000000000000000 a1 : 00001a8e81800000 [ 273.376540] a2 : 0000003c00070198 a3 : 0000003c00db75a4 a4 : 0000000000000015 [ 273.383901] a5 : ffffffd7ff8804b0 a6 : 0000000000000015 a7 : 000000000000002a [ 273.391327] s2 : 000000000000ffff s3 : 0000000000000000 s4 : ffffffd7ff8803b0 [ 273.398773] s5 : 0000000000504d55 s6 : ffffffd905069800 s7 : ffffffff821fe210 [ 273.406139] s8 : 000000007fffffff s9 : ffffffd7ff8803b0 s10: ffffffd903f29098 [ 273.413660] s11: 0000000080000000 t3 : 0000000000000003 t4 : ffffffff8017a0ca [ 273.421022] t5 : ffffffff8023cfc2 t6 : ffffffd9040780e8 [ 273.426437] status: 0000000200000100 badaddr: 0000000000000098 cause: 000000000000000d [ 273.434512] [] riscv_pmu_ctr_get_width_mask+0x8/0x62 [ 273.441169] [] handle_percpu_devid_irq+0x98/0x1ee [ 273.447562] [] generic_handle_domain_irq+0x28/0x36 [ 273.454151] [] riscv_intc_irq+0x36/0x4e [ 273.459659] [] handle_riscv_irq+0x4a/0x74 [ 273.465442] [] do_irq+0x62/0x92 [ 273.470360] Code: 0420 60a2 6402 5529 0141 8082 0013 0000 0013 0000 (6d5c) b783 [ 273.477921] ---[ end trace 0000000000000000 ]--- [ 273.482630] Kernel panic - not syncing: Fatal exception in interrupt Reviewed-by: Alexandre Ghiti Reviewed-by: Atish Patra Signed-off-by: Fei Wu Link: https://lore.kernel.org/r/20240228115425.2613856-1-fei2.wu@intel.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 16acd4dcdb96..452aab49db1e 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -512,7 +512,7 @@ static void pmu_sbi_set_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_reset_scounteren(void *arg) @@ -521,7 +521,7 @@ static void pmu_sbi_reset_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) @@ -731,14 +731,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* compute hardware counter index */ hidx = info->csr - CSR_CYCLE; /* check if the corresponding bit is set in sscountovf */ - if (!(overflow & (1 << hidx))) + if (!(overflow & BIT(hidx))) continue; /* * Keep a track of overflowed counters so that they can be started * with updated initial value. */ - overflowed_ctrs |= 1 << lidx; + overflowed_ctrs |= BIT(lidx); hw_evt = &event->hw; riscv_pmu_event_update(event); perf_sample_data_init(&data, 0, hw_evt->last_period); -- cgit v1.2.3 From 640f41ed33b5a420e05daf395afae85e6b20c003 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:05:15 -0800 Subject: dpll: fix build failure due to rcu_dereference_check() on unknown type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasmiya reports that their compiler complains that we deref a pointer to unknown type with rcu_dereference_rtnl(): include/linux/rcupdate.h:439:9: error: dereferencing pointer to incomplete type ‘struct dpll_pin’ Unclear what compiler it is, at the moment, and we can't report but since DPLL can't be a module - move the code from the header into the source file. Fixes: 0d60d8df6f49 ("dpll: rely on rcu for netdev_dpll_pin()") Reported-by: Tasmiya Nalatwad Link: https://lore.kernel.org/all/3fcf3a2c-1c1b-42c1-bacb-78fdcd700389@linux.vnet.ibm.com/ Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240229190515.2740221-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 5 +++++ include/linux/dpll.h | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 4c2bb27c99fe..241db366b2c7 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,6 +42,11 @@ struct dpll_pin_registration { void *priv; }; +struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + struct dpll_device *dpll_device_get_by_id(int id) { if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 4ec2fe9caf5a..c60591308ae8 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -169,13 +169,13 @@ int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); +#if !IS_ENABLED(CONFIG_DPLL) static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) { -#if IS_ENABLED(CONFIG_DPLL) - return rcu_dereference_rtnl(dev->dpll_pin); -#else return NULL; -#endif } +#else +struct dpll_pin *netdev_dpll_pin(const struct net_device *dev); +#endif #endif -- cgit v1.2.3 From f7916c47f66d778817068d86e5c9b5e511e23c86 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 26 Feb 2024 17:16:10 +1000 Subject: nouveau: report byte usage in VRAM usage. Turns out usage is always in bytes not shifted. Fixes: 72fa02fdf833 ("nouveau: add an ioctl to report vram usage") Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index cd14f993bdd1..80f74ee0fc78 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -269,7 +269,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) break; case NOUVEAU_GETPARAM_VRAM_USED: { struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); - getparam->value = (u64)ttm_resource_manager_usage(vram_mgr) << PAGE_SHIFT; + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); break; } default: -- cgit v1.2.3 From f6ecfdad359a01c7fd8a3bcfde3ef0acdf107e6e Mon Sep 17 00:00:00 2001 From: Sid Pranjale Date: Thu, 29 Feb 2024 21:52:05 +0530 Subject: drm/nouveau: keep DMA buffers required for suspend/resume Nouveau deallocates a few buffers post GPU init which are required for GPU suspend/resume to function correctly. This is likely not as big an issue on systems where the NVGPU is the only GPU, but on multi-GPU set ups it leads to a regression where the kernel module errors and results in a system-wide rendering freeze. This commit addresses that regression by moving the two buffers required for suspend and resume to be deallocated at driver unload instead of post init. Fixes: 042b5f83841fb ("drm/nouveau: fix several DMA buffer leaks") Signed-off-by: Sid Pranjale Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index a64c81385682..a73a5b589790 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1054,8 +1054,6 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) /* Release the DMA buffers that were needed only for boot and init */ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); nvkm_gsp_mem_dtor(gsp, &gsp->libos); - nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); - nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); return ret; } @@ -2163,6 +2161,8 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) r535_gsp_dtor_fws(gsp); + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); nvkm_gsp_mem_dtor(gsp, &gsp->loginit); nvkm_gsp_mem_dtor(gsp, &gsp->logintr); -- cgit v1.2.3 From 3a0f007b6979db6b5e0022d9edf4b61002be3e10 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:04 +0800 Subject: md: add a new helper rdev_has_badblock() The current api is_badblock() must pass in 'first_bad' and 'bad_sectors', however, many caller just want to know if there are badblocks or not, and these caller must define two local variable that will never be used. Add a new helper rdev_has_badblock() that will only return if there are badblocks or not, remove unnecessary local variables and replace is_badblock() with the new helper in many places. There are no functional changes, and the new helper will also be used later to refactor read_balance(). Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-2-yukuai1@huaweicloud.com --- drivers/md/md.h | 10 ++++++++++ drivers/md/raid1.c | 26 +++++++------------------- drivers/md/raid10.c | 45 ++++++++++++++------------------------------- drivers/md/raid5.c | 35 +++++++++++++---------------------- 4 files changed, 44 insertions(+), 72 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.h b/drivers/md/md.h index 8d881cc59799..a49ab04ab707 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -222,6 +222,16 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, } return 0; } + +static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s, + int sectors) +{ + sector_t first_bad; + int bad_sectors; + + return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors); +} + extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 286f8b16c7bd..a145fe48b9ce 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -498,9 +498,6 @@ static void raid1_end_write_request(struct bio *bio) * to user-side. So if something waits for IO, then it * will wait for the 'master' bio. */ - sector_t first_bad; - int bad_sectors; - r1_bio->bios[mirror] = NULL; to_put = bio; /* @@ -516,8 +513,8 @@ static void raid1_end_write_request(struct bio *bio) set_bit(R1BIO_Uptodate, &r1_bio->state); /* Maybe we can clear some bad blocks. */ - if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, - &first_bad, &bad_sectors) && !discard_error) { + if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) && + !discard_error) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); } @@ -1944,8 +1941,6 @@ static void end_sync_write(struct bio *bio) struct r1bio *r1_bio = get_resync_r1bio(bio); struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; - sector_t first_bad; - int bad_sectors; struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { @@ -1955,14 +1950,11 @@ static void end_sync_write(struct bio *bio) set_bit(MD_RECOVERY_NEEDED, & mddev->recovery); set_bit(R1BIO_WriteError, &r1_bio->state); - } else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, - &first_bad, &bad_sectors) && - !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, - r1_bio->sector, - r1_bio->sectors, - &first_bad, &bad_sectors) - ) + } else if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) && + !rdev_has_badblock(conf->mirrors[r1_bio->read_disk].rdev, + r1_bio->sector, r1_bio->sectors)) { set_bit(R1BIO_MadeGood, &r1_bio->state); + } put_sync_write_buf(r1_bio, uptodate); } @@ -2279,16 +2271,12 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) s = PAGE_SIZE >> 9; do { - sector_t first_bad; - int bad_sectors; - rdev = conf->mirrors[d].rdev; if (rdev && (test_bit(In_sync, &rdev->flags) || (!test_bit(Faulty, &rdev->flags) && rdev->recovery_offset >= sect + s)) && - is_badblock(rdev, sect, s, - &first_bad, &bad_sectors) == 0) { + rdev_has_badblock(rdev, sect, s) == 0) { atomic_inc(&rdev->nr_pending); if (sync_page_io(rdev, sect, s<<9, conf->tmppage, REQ_OP_READ, false)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7412066ea22c..d5a7a621f0f0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -518,11 +518,7 @@ static void raid10_end_write_request(struct bio *bio) * The 'master' represents the composite IO operation to * user-side. So if something waits for IO, then it will * wait for the 'master' bio. - */ - sector_t first_bad; - int bad_sectors; - - /* + * * Do not set R10BIO_Uptodate if the current device is * rebuilding or Faulty. This is because we cannot use * such device for properly reading the data back (we could @@ -535,10 +531,9 @@ static void raid10_end_write_request(struct bio *bio) set_bit(R10BIO_Uptodate, &r10_bio->state); /* Maybe we can clear some bad blocks. */ - if (is_badblock(rdev, - r10_bio->devs[slot].addr, - r10_bio->sectors, - &first_bad, &bad_sectors) && !discard_error) { + if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, + r10_bio->sectors) && + !discard_error) { bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; @@ -1330,10 +1325,7 @@ retry_wait: } if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { - sector_t first_bad; sector_t dev_sector = r10_bio->devs[i].addr; - int bad_sectors; - int is_bad; /* * Discard request doesn't care the write result @@ -1342,9 +1334,8 @@ retry_wait: if (!r10_bio->sectors) continue; - is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors, - &first_bad, &bad_sectors); - if (is_bad < 0) { + if (rdev_has_badblock(rdev, dev_sector, + r10_bio->sectors) < 0) { /* * Mustn't write here until the bad block * is acknowledged @@ -2290,8 +2281,6 @@ static void end_sync_write(struct bio *bio) struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; int d; - sector_t first_bad; - int bad_sectors; int slot; int repl; struct md_rdev *rdev = NULL; @@ -2312,11 +2301,10 @@ static void end_sync_write(struct bio *bio) &rdev->mddev->recovery); set_bit(R10BIO_WriteError, &r10_bio->state); } - } else if (is_badblock(rdev, - r10_bio->devs[slot].addr, - r10_bio->sectors, - &first_bad, &bad_sectors)) + } else if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, + r10_bio->sectors)) { set_bit(R10BIO_MadeGood, &r10_bio->state); + } rdev_dec_pending(rdev, mddev); @@ -2597,11 +2585,8 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, int sectors, struct page *page, enum req_op op) { - sector_t first_bad; - int bad_sectors; - - if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) - && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) + if (rdev_has_badblock(rdev, sector, sectors) && + (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; if (sync_page_io(rdev, sector, sectors << 9, page, op, false)) /* success */ @@ -2658,16 +2643,14 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 s = PAGE_SIZE >> 9; do { - sector_t first_bad; - int bad_sectors; - d = r10_bio->devs[sl].devnum; rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags) && - is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, - &first_bad, &bad_sectors) == 0) { + rdev_has_badblock(rdev, + r10_bio->devs[sl].addr + sect, + s) == 0) { atomic_inc(&rdev->nr_pending); success = sync_page_io(rdev, r10_bio->devs[sl].addr + diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ec445f49f1c..48129de21aec 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1210,10 +1210,8 @@ again: */ while (op_is_write(op) && rdev && test_bit(WriteErrorSeen, &rdev->flags)) { - sector_t first_bad; - int bad_sectors; - int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), - &first_bad, &bad_sectors); + int bad = rdev_has_badblock(rdev, sh->sector, + RAID5_STRIPE_SECTORS(conf)); if (!bad) break; @@ -2855,8 +2853,6 @@ static void raid5_end_write_request(struct bio *bi) struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; struct md_rdev *rdev; - sector_t first_bad; - int bad_sectors; int replacement = 0; for (i = 0 ; i < disks; i++) { @@ -2888,9 +2884,8 @@ static void raid5_end_write_request(struct bio *bi) if (replacement) { if (bi->bi_status) md_error(conf->mddev, rdev); - else if (is_badblock(rdev, sh->sector, - RAID5_STRIPE_SECTORS(conf), - &first_bad, &bad_sectors)) + else if (rdev_has_badblock(rdev, sh->sector, + RAID5_STRIPE_SECTORS(conf))) set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); } else { if (bi->bi_status) { @@ -2900,9 +2895,8 @@ static void raid5_end_write_request(struct bio *bi) if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); - } else if (is_badblock(rdev, sh->sector, - RAID5_STRIPE_SECTORS(conf), - &first_bad, &bad_sectors)) { + } else if (rdev_has_badblock(rdev, sh->sector, + RAID5_STRIPE_SECTORS(conf))) { set_bit(R5_MadeGood, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) /* That was a successful write so make @@ -4674,8 +4668,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) /* Now to look around and see what can be done */ for (i=disks; i--; ) { struct md_rdev *rdev; - sector_t first_bad; - int bad_sectors; int is_bad = 0; dev = &sh->dev[i]; @@ -4719,8 +4711,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) rdev = conf->disks[i].replacement; if (rdev && !test_bit(Faulty, &rdev->flags) && rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && - !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), - &first_bad, &bad_sectors)) + !rdev_has_badblock(rdev, sh->sector, + RAID5_STRIPE_SECTORS(conf))) set_bit(R5_ReadRepl, &dev->flags); else { if (rdev && !test_bit(Faulty, &rdev->flags)) @@ -4733,8 +4725,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (rdev && test_bit(Faulty, &rdev->flags)) rdev = NULL; if (rdev) { - is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), - &first_bad, &bad_sectors); + is_bad = rdev_has_badblock(rdev, sh->sector, + RAID5_STRIPE_SECTORS(conf)); if (s->blocked_rdev == NULL && (test_bit(Blocked, &rdev->flags) || is_bad < 0)) { @@ -5463,8 +5455,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) struct r5conf *conf = mddev->private; struct bio *align_bio; struct md_rdev *rdev; - sector_t sector, end_sector, first_bad; - int bad_sectors, dd_idx; + sector_t sector, end_sector; + int dd_idx; bool did_inc; if (!in_chunk_boundary(mddev, raid_bio)) { @@ -5493,8 +5485,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) atomic_inc(&rdev->nr_pending); - if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, - &bad_sectors)) { + if (rdev_has_badblock(rdev, sector, bio_sectors(raid_bio))) { rdev_dec_pending(rdev, mddev); return 0; } -- cgit v1.2.3 From 969d6589abcb369d53d84ec7c9c37f4b23ec1ad9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:05 +0800 Subject: md/raid1: factor out helpers to add rdev to conf There are no functional changes, just make code cleaner and prepare to record disk non-rotational information while adding and removing rdev to conf Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-3-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 85 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a145fe48b9ce..6ec9998f6257 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1757,6 +1757,44 @@ static int raid1_spare_active(struct mddev *mddev) return count; } +static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk, + bool replacement) +{ + struct raid1_info *info = conf->mirrors + disk; + + if (replacement) + info += conf->raid_disks; + + if (info->rdev) + return false; + + rdev->raid_disk = disk; + info->head_position = 0; + info->seq_start = MaxSector; + WRITE_ONCE(info->rdev, rdev); + + return true; +} + +static bool raid1_remove_conf(struct r1conf *conf, int disk) +{ + struct raid1_info *info = conf->mirrors + disk; + struct md_rdev *rdev = info->rdev; + + if (!rdev || test_bit(In_sync, &rdev->flags) || + atomic_read(&rdev->nr_pending)) + return false; + + /* Only remove non-faulty devices if recovery is not possible. */ + if (!test_bit(Faulty, &rdev->flags) && + rdev->mddev->recovery_disabled != conf->recovery_disabled && + rdev->mddev->degraded < conf->raid_disks) + return false; + + WRITE_ONCE(info->rdev, NULL); + return true; +} + static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) { struct r1conf *conf = mddev->private; @@ -1792,15 +1830,13 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - p->head_position = 0; - rdev->raid_disk = mirror; + raid1_add_conf(conf, rdev, mirror, false); err = 0; /* As all devices are equivalent, we don't need a full recovery * if this was recently any drive of the array */ if (rdev->saved_raid_disk < 0) conf->fullsync = 1; - WRITE_ONCE(p->rdev, rdev); break; } if (test_bit(WantReplacement, &p->rdev->flags) && @@ -1810,13 +1846,11 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (err && repl_slot >= 0) { /* Add this device as a replacement */ - p = conf->mirrors + repl_slot; clear_bit(In_sync, &rdev->flags); set_bit(Replacement, &rdev->flags); - rdev->raid_disk = repl_slot; + raid1_add_conf(conf, rdev, repl_slot, true); err = 0; conf->fullsync = 1; - WRITE_ONCE(p[conf->raid_disks].rdev, rdev); } print_conf(conf); @@ -1833,27 +1867,20 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) if (unlikely(number >= conf->raid_disks)) goto abort; - if (rdev != p->rdev) - p = conf->mirrors + conf->raid_disks + number; + if (rdev != p->rdev) { + number += conf->raid_disks; + p = conf->mirrors + number; + } print_conf(conf); if (rdev == p->rdev) { - if (test_bit(In_sync, &rdev->flags) || - atomic_read(&rdev->nr_pending)) { - err = -EBUSY; - goto abort; - } - /* Only remove non-faulty devices if recovery - * is not possible. - */ - if (!test_bit(Faulty, &rdev->flags) && - mddev->recovery_disabled != conf->recovery_disabled && - mddev->degraded < conf->raid_disks) { + if (!raid1_remove_conf(conf, number)) { err = -EBUSY; goto abort; } - WRITE_ONCE(p->rdev, NULL); - if (conf->mirrors[conf->raid_disks + number].rdev) { + + if (number < conf->raid_disks && + conf->mirrors[conf->raid_disks + number].rdev) { /* We just removed a device that is being replaced. * Move down the replacement. We drain all IO before * doing this to avoid confusion. @@ -2994,23 +3021,17 @@ static struct r1conf *setup_conf(struct mddev *mddev) err = -EINVAL; spin_lock_init(&conf->device_lock); + conf->raid_disks = mddev->raid_disks; rdev_for_each(rdev, mddev) { int disk_idx = rdev->raid_disk; - if (disk_idx >= mddev->raid_disks - || disk_idx < 0) + + if (disk_idx >= conf->raid_disks || disk_idx < 0) continue; - if (test_bit(Replacement, &rdev->flags)) - disk = conf->mirrors + mddev->raid_disks + disk_idx; - else - disk = conf->mirrors + disk_idx; - if (disk->rdev) + if (!raid1_add_conf(conf, rdev, disk_idx, + test_bit(Replacement, &rdev->flags))) goto abort; - disk->rdev = rdev; - disk->head_position = 0; - disk->seq_start = MaxSector; } - conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->bio_end_io_list); -- cgit v1.2.3 From 2c27d09d3a76b33629d2e681bf8b774f776ade7f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:06 +0800 Subject: md/raid1: record nonrot rdevs while adding/removing rdevs to conf For raid1, each read will iterate all the rdevs from conf and check if any rdev is non-rotational, then choose rdev with minimal IO inflight if so, or rdev with closest distance otherwise. Disk nonrot info can be changed through sysfs entry: /sys/block/[disk_name]/queue/rotational However, consider that this should only be used for testing, and user really shouldn't do this in real life. Record the number of non-rotational disks in conf, to avoid checking each rdev in IO fast path and simplify read_balance() a little bit. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-4-yukuai1@huaweicloud.com --- drivers/md/md.h | 1 + drivers/md/raid1.c | 17 ++++++++++------- drivers/md/raid1.h | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.h b/drivers/md/md.h index a49ab04ab707..b2076a165c10 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -207,6 +207,7 @@ enum flag_bits { * check if there is collision between raid1 * serial bios. */ + Nonrot, /* non-rotational device (SSD) */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6ec9998f6257..de6ea87d4d24 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -599,7 +599,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect int sectors; int best_good_sectors; int best_disk, best_dist_disk, best_pending_disk; - int has_nonrot_disk; int disk; sector_t best_dist; unsigned int min_pending; @@ -620,7 +619,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_pending_disk = -1; min_pending = UINT_MAX; best_good_sectors = 0; - has_nonrot_disk = 0; choose_next_idle = 0; clear_bit(R1BIO_FailFast, &r1_bio->state); @@ -637,7 +635,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect sector_t first_bad; int bad_sectors; unsigned int pending; - bool nonrot; rdev = conf->mirrors[disk].rdev; if (r1_bio->bios[disk] == IO_BLOCKED @@ -703,8 +700,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect /* At least two disks to choose from so failfast is OK */ set_bit(R1BIO_FailFast, &r1_bio->state); - nonrot = bdev_nonrot(rdev->bdev); - has_nonrot_disk |= nonrot; pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); if (choose_first) { @@ -731,7 +726,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * small, but not a big deal since when the second disk * starts IO, the first disk is likely still busy. */ - if (nonrot && opt_iosize > 0 && + if (test_bit(Nonrot, &rdev->flags) && opt_iosize > 0 && mirror->seq_start != MaxSector && mirror->next_seq_sect > opt_iosize && mirror->next_seq_sect - opt_iosize >= @@ -763,7 +758,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * mixed ratation/non-rotational disks depending on workload. */ if (best_disk == -1) { - if (has_nonrot_disk || min_pending == 0) + if (READ_ONCE(conf->nonrot_disks) || min_pending == 0) best_disk = best_pending_disk; else best_disk = best_dist_disk; @@ -1768,6 +1763,11 @@ static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk, if (info->rdev) return false; + if (bdev_nonrot(rdev->bdev)) { + set_bit(Nonrot, &rdev->flags); + WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1); + } + rdev->raid_disk = disk; info->head_position = 0; info->seq_start = MaxSector; @@ -1791,6 +1791,9 @@ static bool raid1_remove_conf(struct r1conf *conf, int disk) rdev->mddev->degraded < conf->raid_disks) return false; + if (test_and_clear_bit(Nonrot, &rdev->flags)) + WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks - 1); + WRITE_ONCE(info->rdev, NULL); return true; } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 14d4211a123a..5300cbaa58a4 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -71,6 +71,7 @@ struct r1conf { * allow for replacements. */ int raid_disks; + int nonrot_disks; spinlock_t device_lock; -- cgit v1.2.3 From 257ac239ffcfd097a9a0732bf5095fb00164f334 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:07 +0800 Subject: md/raid1: fix choose next idle in read_balance() Commit 12cee5a8a29e ("md/raid1: prevent merging too large request") add the case choose next idle in read_balance(): read_balance: for_each_rdev if(next_seq_sect == this_sector || dist == 0) -> sequential reads best_disk = disk; if (...) choose_next_idle = 1 continue; for_each_rdev -> iterate next rdev if (pending == 0) best_disk = disk; -> choose the next idle disk break; if (choose_next_idle) -> keep using this rdev if there are no other idle disk contine However, commit 2e52d449bcec ("md/raid1: add failfast handling for reads.") remove the code: - /* If device is idle, use it */ - if (pending == 0) { - best_disk = disk; - break; - } Hence choose next idle will never work now, fix this problem by following: 1) don't set best_disk in this case, read_balance() will choose the best disk after iterating all the disks; 2) add 'pending' so that other idle disk will be chosen; 3) add a new local variable 'sequential_disk' to record the disk, and if there is no other idle disk, 'sequential_disk' will be chosen; Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.") Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-5-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index de6ea87d4d24..fa86d9fdb16f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -598,13 +598,12 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect const sector_t this_sector = r1_bio->sector; int sectors; int best_good_sectors; - int best_disk, best_dist_disk, best_pending_disk; + int best_disk, best_dist_disk, best_pending_disk, sequential_disk; int disk; sector_t best_dist; unsigned int min_pending; struct md_rdev *rdev; int choose_first; - int choose_next_idle; /* * Check if we can balance. We can balance on the whole @@ -615,11 +614,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect sectors = r1_bio->sectors; best_disk = -1; best_dist_disk = -1; + sequential_disk = -1; best_dist = MaxSector; best_pending_disk = -1; min_pending = UINT_MAX; best_good_sectors = 0; - choose_next_idle = 0; clear_bit(R1BIO_FailFast, &r1_bio->state); if ((conf->mddev->recovery_cp < this_sector + sectors) || @@ -712,7 +711,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; struct raid1_info *mirror = &conf->mirrors[disk]; - best_disk = disk; /* * If buffered sequential IO size exceeds optimal * iosize, check if there is idle disk. If yes, choose @@ -731,15 +729,22 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect mirror->next_seq_sect > opt_iosize && mirror->next_seq_sect - opt_iosize >= mirror->seq_start) { - choose_next_idle = 1; - continue; + /* + * Add 'pending' to avoid choosing this disk if + * there is other idle disk. + */ + pending++; + /* + * If there is no other idle disk, this disk + * will be chosen. + */ + sequential_disk = disk; + } else { + best_disk = disk; + break; } - break; } - if (choose_next_idle) - continue; - if (min_pending > pending) { min_pending = pending; best_pending_disk = disk; @@ -751,6 +756,13 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect } } + /* + * sequential IO size exceeds optimal iosize, however, there is no other + * idle disk, so choose the sequential disk. + */ + if (best_disk == -1 && min_pending != 0) + best_disk = sequential_disk; + /* * If all disks are rotational, choose the closest disk. If any disk is * non-rotational, choose the disk with less pending request even the -- cgit v1.2.3 From f29841ff3b272e1703454f93b96baf0fe0d9f31a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:08 +0800 Subject: md/raid1-10: add a helper raid1_check_read_range() The checking and handler of bad blocks appear many timers during read_balance() in raid1 and raid10. This helper will be used in later patches to simplify read_balance() a lot. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-6-yukuai1@huaweicloud.com --- drivers/md/raid1-10.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'drivers') diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 512746551f36..9bc0f0022a6c 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -227,3 +227,52 @@ static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) return false; } + +/** + * raid1_check_read_range() - check a given read range for bad blocks, + * available read length is returned; + * @rdev: the rdev to read; + * @this_sector: read position; + * @len: read length; + * + * helper function for read_balance() + * + * 1) If there are no bad blocks in the range, @len is returned; + * 2) If the range are all bad blocks, 0 is returned; + * 3) If there are partial bad blocks: + * - If the bad block range starts after @this_sector, the length of first + * good region is returned; + * - If the bad block range starts before @this_sector, 0 is returned and + * the @len is updated to the offset into the region before we get to the + * good blocks; + */ +static inline int raid1_check_read_range(struct md_rdev *rdev, + sector_t this_sector, int *len) +{ + sector_t first_bad; + int bad_sectors; + + /* no bad block overlap */ + if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors)) + return *len; + + /* + * bad block range starts offset into our range so we can return the + * number of sectors before the bad blocks start. + */ + if (first_bad > this_sector) + return first_bad - this_sector; + + /* read range is fully consumed by bad blocks. */ + if (this_sector + *len <= first_bad + bad_sectors) + return 0; + + /* + * final case, bad block range starts before or at the start of our + * range but does not cover our entire range so we still return 0 but + * update the length with the number of sectors before we get to the + * good ones. + */ + *len = first_bad + bad_sectors - this_sector; + return 0; +} -- cgit v1.2.3 From f109207629552cb04c2a48e90abe7c481e363984 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:09 +0800 Subject: md/raid1-10: factor out a new helper raid1_should_read_first() If resync is in progress, read_balance() should find the first usable disk, otherwise, data could be inconsistent after resync is done. raid1 and raid10 implement the same checking, hence factor out the checking to make code cleaner. Noted that raid1 is using 'mddev->recovery_cp', which is updated after all resync IO is done, while raid10 is using 'conf->next_resync', which is inaccurate because raid10 update it before submitting resync IO. Fortunately, raid10 read IO can't concurrent with resync IO, hence there is no problem. And this patch also switch raid10 to use 'mddev->recovery_cp'. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-7-yukuai1@huaweicloud.com --- drivers/md/raid1-10.c | 20 ++++++++++++++++++++ drivers/md/raid1.c | 15 ++------------- drivers/md/raid10.c | 13 ++----------- 3 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 9bc0f0022a6c..2ea1710a3b70 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -276,3 +276,23 @@ static inline int raid1_check_read_range(struct md_rdev *rdev, *len = first_bad + bad_sectors - this_sector; return 0; } + +/* + * Check if read should choose the first rdev. + * + * Balance on the whole device if no resync is going on (recovery is ok) or + * below the resync window. Otherwise, take the first readable disk. + */ +static inline bool raid1_should_read_first(struct mddev *mddev, + sector_t this_sector, int len) +{ + if ((mddev->recovery_cp < this_sector + len)) + return true; + + if (mddev_is_clustered(mddev) && + md_cluster_ops->area_resyncing(mddev, READ, this_sector, + this_sector + len)) + return true; + + return false; +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fa86d9fdb16f..30f467bb48fd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -605,11 +605,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect struct md_rdev *rdev; int choose_first; - /* - * Check if we can balance. We can balance on the whole - * device if no resync is going on, or below the resync window. - * We take the first readable disk when above the resync window. - */ retry: sectors = r1_bio->sectors; best_disk = -1; @@ -619,16 +614,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_pending_disk = -1; min_pending = UINT_MAX; best_good_sectors = 0; + choose_first = raid1_should_read_first(conf->mddev, this_sector, + sectors); clear_bit(R1BIO_FailFast, &r1_bio->state); - if ((conf->mddev->recovery_cp < this_sector + sectors) || - (mddev_is_clustered(conf->mddev) && - md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, - this_sector + sectors))) - choose_first = 1; - else - choose_first = 0; - for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { sector_t dist; sector_t first_bad; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d5a7a621f0f0..8aecdb1ccc16 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -748,17 +748,8 @@ static struct md_rdev *read_balance(struct r10conf *conf, best_good_sectors = 0; do_balance = 1; clear_bit(R10BIO_FailFast, &r10_bio->state); - /* - * Check if we can balance. We can balance on the whole - * device if no resync is going on (recovery is ok), or below - * the resync window. We take the first readable disk when - * above the resync window. - */ - if ((conf->mddev->recovery_cp < MaxSector - && (this_sector + sectors >= conf->next_resync)) || - (mddev_is_clustered(conf->mddev) && - md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, - this_sector + sectors))) + + if (raid1_should_read_first(conf->mddev, this_sector, sectors)) do_balance = 0; for (slot = 0; slot < conf->copies ; slot++) { -- cgit v1.2.3 From 31a73331752d3c7d28c8fc089b21d3ae8c15e664 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:10 +0800 Subject: md/raid1: factor out read_first_rdev() from read_balance() read_balance() is hard to understand because there are too many status and branches, and it's overlong. This patch factor out the case to read the first rdev from read_balance(), there are no functional changes. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-8-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 63 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 30f467bb48fd..3149f22f1155 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -579,6 +579,47 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector, return len; } +static void update_read_sectors(struct r1conf *conf, int disk, + sector_t this_sector, int len) +{ + struct raid1_info *info = &conf->mirrors[disk]; + + atomic_inc(&info->rdev->nr_pending); + if (info->next_seq_sect != this_sector) + info->seq_start = this_sector; + info->next_seq_sect = this_sector + len; +} + +static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, + int *max_sectors) +{ + sector_t this_sector = r1_bio->sector; + int len = r1_bio->sectors; + int disk; + + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { + struct md_rdev *rdev; + int read_len; + + if (r1_bio->bios[disk] == IO_BLOCKED) + continue; + + rdev = conf->mirrors[disk].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags)) + continue; + + /* choose the first disk even if it has some bad blocks. */ + read_len = raid1_check_read_range(rdev, this_sector, &len); + if (read_len > 0) { + update_read_sectors(conf, disk, this_sector, read_len); + *max_sectors = read_len; + return disk; + } + } + + return -1; +} + /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector @@ -603,7 +644,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect sector_t best_dist; unsigned int min_pending; struct md_rdev *rdev; - int choose_first; retry: sectors = r1_bio->sectors; @@ -614,10 +654,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_pending_disk = -1; min_pending = UINT_MAX; best_good_sectors = 0; - choose_first = raid1_should_read_first(conf->mddev, this_sector, - sectors); clear_bit(R1BIO_FailFast, &r1_bio->state); + if (raid1_should_read_first(conf->mddev, this_sector, sectors)) + return choose_first_rdev(conf, r1_bio, max_sectors); + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { sector_t dist; sector_t first_bad; @@ -663,8 +704,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * bad_sectors from another device.. */ bad_sectors -= (this_sector - first_bad); - if (choose_first && sectors > bad_sectors) - sectors = bad_sectors; if (best_good_sectors > sectors) best_good_sectors = sectors; @@ -674,8 +713,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_good_sectors = good_sectors; best_disk = disk; } - if (choose_first) - break; } continue; } else { @@ -690,10 +727,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); - if (choose_first) { - best_disk = disk; - break; - } /* Don't change to another disk for sequential reads */ if (conf->mirrors[disk].next_seq_sect == this_sector || dist == 0) { @@ -769,13 +802,9 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect rdev = conf->mirrors[best_disk].rdev; if (!rdev) goto retry; - atomic_inc(&rdev->nr_pending); - sectors = best_good_sectors; - - if (conf->mirrors[best_disk].next_seq_sect != this_sector) - conf->mirrors[best_disk].seq_start = this_sector; - conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; + sectors = best_good_sectors; + update_read_sectors(conf, disk, this_sector, sectors); } *max_sectors = sectors; -- cgit v1.2.3 From dfa8ecd167c1753d4fc24a517e1d79c603183c94 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:11 +0800 Subject: md/raid1: factor out choose_slow_rdev() from read_balance() read_balance() is hard to understand because there are too many status and branches, and it's overlong. This patch factor out the case to read the slow rdev from read_balance(), there are no functional changes. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-9-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 69 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3149f22f1155..09b7e93a54b5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -620,6 +620,53 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, + int *max_sectors) +{ + sector_t this_sector = r1_bio->sector; + int bb_disk = -1; + int bb_read_len = 0; + int disk; + + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { + struct md_rdev *rdev; + int len; + int read_len; + + if (r1_bio->bios[disk] == IO_BLOCKED) + continue; + + rdev = conf->mirrors[disk].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags) || + !test_bit(WriteMostly, &rdev->flags)) + continue; + + /* there are no bad blocks, we can use this disk */ + len = r1_bio->sectors; + read_len = raid1_check_read_range(rdev, this_sector, &len); + if (read_len == r1_bio->sectors) { + update_read_sectors(conf, disk, this_sector, read_len); + return disk; + } + + /* + * there are partial bad blocks, choose the rdev with largest + * read length. + */ + if (read_len > bb_read_len) { + bb_disk = disk; + bb_read_len = read_len; + } + } + + if (bb_disk != -1) { + *max_sectors = bb_read_len; + update_read_sectors(conf, bb_disk, this_sector, bb_read_len); + } + + return bb_disk; +} + /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector @@ -673,23 +720,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (!test_bit(In_sync, &rdev->flags) && rdev->recovery_offset < this_sector + sectors) continue; - if (test_bit(WriteMostly, &rdev->flags)) { - /* Don't balance among write-mostly, just - * use the first as a last resort */ - if (best_dist_disk < 0) { - if (is_badblock(rdev, this_sector, sectors, - &first_bad, &bad_sectors)) { - if (first_bad <= this_sector) - /* Cannot use this */ - continue; - best_good_sectors = first_bad - this_sector; - } else - best_good_sectors = sectors; - best_dist_disk = disk; - best_pending_disk = disk; - } + if (test_bit(WriteMostly, &rdev->flags)) continue; - } /* This is a reasonable device to use. It might * even be best. */ @@ -808,7 +840,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect } *max_sectors = sectors; - return best_disk; + if (best_disk >= 0) + return best_disk; + + return choose_slow_rdev(conf, r1_bio, max_sectors); } static void wake_up_barrier(struct r1conf *conf) -- cgit v1.2.3 From 9f3ced792203891b8fa39afa37908eba843fcfac Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:12 +0800 Subject: md/raid1: factor out choose_bb_rdev() from read_balance() read_balance() is hard to understand because there are too many status and branches, and it's overlong. This patch factor out the case to read the rdev with bad blocks from read_balance(), there are no functional changes. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-10-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 79 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 09b7e93a54b5..f6e75c123e5a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -620,6 +620,44 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, + int *max_sectors) +{ + sector_t this_sector = r1_bio->sector; + int best_disk = -1; + int best_len = 0; + int disk; + + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { + struct md_rdev *rdev; + int len; + int read_len; + + if (r1_bio->bios[disk] == IO_BLOCKED) + continue; + + rdev = conf->mirrors[disk].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags) || + test_bit(WriteMostly, &rdev->flags)) + continue; + + /* keep track of the disk with the most readable sectors. */ + len = r1_bio->sectors; + read_len = raid1_check_read_range(rdev, this_sector, &len); + if (read_len > best_len) { + best_disk = disk; + best_len = read_len; + } + } + + if (best_disk != -1) { + *max_sectors = best_len; + update_read_sectors(conf, best_disk, this_sector, best_len); + } + + return best_disk; +} + static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -708,8 +746,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { sector_t dist; - sector_t first_bad; - int bad_sectors; unsigned int pending; rdev = conf->mirrors[disk].rdev; @@ -722,36 +758,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect continue; if (test_bit(WriteMostly, &rdev->flags)) continue; - /* This is a reasonable device to use. It might - * even be best. - */ - if (is_badblock(rdev, this_sector, sectors, - &first_bad, &bad_sectors)) { - if (best_dist < MaxSector) - /* already have a better device */ - continue; - if (first_bad <= this_sector) { - /* cannot read here. If this is the 'primary' - * device, then we must not read beyond - * bad_sectors from another device.. - */ - bad_sectors -= (this_sector - first_bad); - if (best_good_sectors > sectors) - best_good_sectors = sectors; - - } else { - sector_t good_sectors = first_bad - this_sector; - if (good_sectors > best_good_sectors) { - best_good_sectors = good_sectors; - best_disk = disk; - } - } + if (rdev_has_badblock(rdev, this_sector, sectors)) continue; - } else { - if ((sectors > best_good_sectors) && (best_disk >= 0)) - best_disk = -1; - best_good_sectors = sectors; - } if (best_disk >= 0) /* At least two disks to choose from so failfast is OK */ @@ -843,6 +851,15 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (best_disk >= 0) return best_disk; + /* + * If we are here it means we didn't find a perfectly good disk so + * now spend a bit more time trying to find one with the most good + * sectors. + */ + disk = choose_bb_rdev(conf, r1_bio, max_sectors); + if (disk >= 0) + return disk; + return choose_slow_rdev(conf, r1_bio, max_sectors); } -- cgit v1.2.3 From ba58f57fdf98af642c57654599823640ffe8334c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:13 +0800 Subject: md/raid1: factor out the code to manage sequential IO There is no functional change for now, make read_balance() cleaner and prepare to fix problems and refactor the handler of sequential IO. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-11-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 71 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f6e75c123e5a..17c2201d5d2b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -705,6 +705,31 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, return bb_disk; } +static bool is_sequential(struct r1conf *conf, int disk, struct r1bio *r1_bio) +{ + /* TODO: address issues with this check and concurrency. */ + return conf->mirrors[disk].next_seq_sect == r1_bio->sector || + conf->mirrors[disk].head_position == r1_bio->sector; +} + +/* + * If buffered sequential IO size exceeds optimal iosize, check if there is idle + * disk. If yes, choose the idle disk. + */ +static bool should_choose_next(struct r1conf *conf, int disk) +{ + struct raid1_info *mirror = &conf->mirrors[disk]; + int opt_iosize; + + if (!test_bit(Nonrot, &mirror->rdev->flags)) + return false; + + opt_iosize = bdev_io_opt(mirror->rdev->bdev) >> 9; + return opt_iosize > 0 && mirror->seq_start != MaxSector && + mirror->next_seq_sect > opt_iosize && + mirror->next_seq_sect - opt_iosize >= mirror->seq_start; +} + /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector @@ -768,43 +793,21 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); /* Don't change to another disk for sequential reads */ - if (conf->mirrors[disk].next_seq_sect == this_sector - || dist == 0) { - int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; - struct raid1_info *mirror = &conf->mirrors[disk]; - - /* - * If buffered sequential IO size exceeds optimal - * iosize, check if there is idle disk. If yes, choose - * the idle disk. read_balance could already choose an - * idle disk before noticing it's a sequential IO in - * this disk. This doesn't matter because this disk - * will idle, next time it will be utilized after the - * first disk has IO size exceeds optimal iosize. In - * this way, iosize of the first disk will be optimal - * iosize at least. iosize of the second disk might be - * small, but not a big deal since when the second disk - * starts IO, the first disk is likely still busy. - */ - if (test_bit(Nonrot, &rdev->flags) && opt_iosize > 0 && - mirror->seq_start != MaxSector && - mirror->next_seq_sect > opt_iosize && - mirror->next_seq_sect - opt_iosize >= - mirror->seq_start) { - /* - * Add 'pending' to avoid choosing this disk if - * there is other idle disk. - */ - pending++; - /* - * If there is no other idle disk, this disk - * will be chosen. - */ - sequential_disk = disk; - } else { + if (is_sequential(conf, disk, r1_bio)) { + if (!should_choose_next(conf, disk)) { best_disk = disk; break; } + /* + * Add 'pending' to avoid choosing this disk if + * there is other idle disk. + */ + pending++; + /* + * If there is no other idle disk, this disk + * will be chosen. + */ + sequential_disk = disk; } if (min_pending > pending) { -- cgit v1.2.3 From 0091c5a269eca7ab0e057c3083804daed9997f08 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 29 Feb 2024 17:57:14 +0800 Subject: md/raid1: factor out helpers to choose the best rdev from read_balance() The way that best rdev is chosen: 1) If the read is sequential from one rdev: - if rdev is rotational, use this rdev; - if rdev is non-rotational, use this rdev until total read length exceed disk opt io size; 2) If the read is not sequential: - if there is idle disk, use it, otherwise: - if the array has non-rotational disk, choose the rdev with minimal inflight IO; - if all the underlaying disks are rotational disk, choose the rdev with closest IO; There are no functional changes, just to make code cleaner and prepare for following refactor. Co-developed-by: Paul Luse Signed-off-by: Paul Luse Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240229095714.926789-12-yukuai1@huaweicloud.com --- drivers/md/raid1.c | 175 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 98 insertions(+), 77 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 17c2201d5d2b..afca975ec7f3 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -730,74 +730,71 @@ static bool should_choose_next(struct r1conf *conf, int disk) mirror->next_seq_sect - opt_iosize >= mirror->seq_start; } -/* - * This routine returns the disk from which the requested read should - * be done. There is a per-array 'next expected sequential IO' sector - * number - if this matches on the next IO then we use the last disk. - * There is also a per-disk 'last know head position' sector that is - * maintained from IRQ contexts, both the normal and the resync IO - * completion handlers update this position correctly. If there is no - * perfect sequential match then we pick the disk whose head is closest. - * - * If there are 2 mirrors in the same 2 devices, performance degrades - * because position is mirror, not device based. - * - * The rdev for the device selected will have nr_pending incremented. - */ -static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) +static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) { - const sector_t this_sector = r1_bio->sector; - int sectors; - int best_good_sectors; - int best_disk, best_dist_disk, best_pending_disk, sequential_disk; - int disk; - sector_t best_dist; - unsigned int min_pending; - struct md_rdev *rdev; + if (!rdev || test_bit(Faulty, &rdev->flags)) + return false; - retry: - sectors = r1_bio->sectors; - best_disk = -1; - best_dist_disk = -1; - sequential_disk = -1; - best_dist = MaxSector; - best_pending_disk = -1; - min_pending = UINT_MAX; - best_good_sectors = 0; - clear_bit(R1BIO_FailFast, &r1_bio->state); + /* still in recovery */ + if (!test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + return false; - if (raid1_should_read_first(conf->mddev, this_sector, sectors)) - return choose_first_rdev(conf, r1_bio, max_sectors); + /* don't read from slow disk unless have to */ + if (test_bit(WriteMostly, &rdev->flags)) + return false; + + /* don't split IO for bad blocks unless have to */ + if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors)) + return false; + + return true; +} + +struct read_balance_ctl { + sector_t closest_dist; + int closest_dist_disk; + int min_pending; + int min_pending_disk; + int sequential_disk; + int readable_disks; +}; + +static int choose_best_rdev(struct r1conf *conf, struct r1bio *r1_bio) +{ + int disk; + struct read_balance_ctl ctl = { + .closest_dist_disk = -1, + .closest_dist = MaxSector, + .min_pending_disk = -1, + .min_pending = UINT_MAX, + .sequential_disk = -1, + }; for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { + struct md_rdev *rdev; sector_t dist; unsigned int pending; - rdev = conf->mirrors[disk].rdev; - if (r1_bio->bios[disk] == IO_BLOCKED - || rdev == NULL - || test_bit(Faulty, &rdev->flags)) - continue; - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < this_sector + sectors) - continue; - if (test_bit(WriteMostly, &rdev->flags)) + if (r1_bio->bios[disk] == IO_BLOCKED) continue; - if (rdev_has_badblock(rdev, this_sector, sectors)) + + rdev = conf->mirrors[disk].rdev; + if (!rdev_readable(rdev, r1_bio)) continue; - if (best_disk >= 0) - /* At least two disks to choose from so failfast is OK */ + /* At least two disks to choose from so failfast is OK */ + if (ctl.readable_disks++ == 1) set_bit(R1BIO_FailFast, &r1_bio->state); pending = atomic_read(&rdev->nr_pending); - dist = abs(this_sector - conf->mirrors[disk].head_position); + dist = abs(r1_bio->sector - conf->mirrors[disk].head_position); + /* Don't change to another disk for sequential reads */ if (is_sequential(conf, disk, r1_bio)) { - if (!should_choose_next(conf, disk)) { - best_disk = disk; - break; - } + if (!should_choose_next(conf, disk)) + return disk; + /* * Add 'pending' to avoid choosing this disk if * there is other idle disk. @@ -807,17 +804,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * If there is no other idle disk, this disk * will be chosen. */ - sequential_disk = disk; + ctl.sequential_disk = disk; } - if (min_pending > pending) { - min_pending = pending; - best_pending_disk = disk; + if (ctl.min_pending > pending) { + ctl.min_pending = pending; + ctl.min_pending_disk = disk; } - if (dist < best_dist) { - best_dist = dist; - best_dist_disk = disk; + if (ctl.closest_dist > dist) { + ctl.closest_dist = dist; + ctl.closest_dist_disk = disk; } } @@ -825,8 +822,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * sequential IO size exceeds optimal iosize, however, there is no other * idle disk, so choose the sequential disk. */ - if (best_disk == -1 && min_pending != 0) - best_disk = sequential_disk; + if (ctl.sequential_disk != -1 && ctl.min_pending != 0) + return ctl.sequential_disk; /* * If all disks are rotational, choose the closest disk. If any disk is @@ -834,25 +831,49 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * disk is rotational, which might/might not be optimal for raids with * mixed ratation/non-rotational disks depending on workload. */ - if (best_disk == -1) { - if (READ_ONCE(conf->nonrot_disks) || min_pending == 0) - best_disk = best_pending_disk; - else - best_disk = best_dist_disk; - } + if (ctl.min_pending_disk != -1 && + (READ_ONCE(conf->nonrot_disks) || ctl.min_pending == 0)) + return ctl.min_pending_disk; + else + return ctl.closest_dist_disk; +} - if (best_disk >= 0) { - rdev = conf->mirrors[best_disk].rdev; - if (!rdev) - goto retry; +/* + * This routine returns the disk from which the requested read should be done. + * + * 1) If resync is in progress, find the first usable disk and use it even if it + * has some bad blocks. + * + * 2) Now that there is no resync, loop through all disks and skipping slow + * disks and disks with bad blocks for now. Only pay attention to key disk + * choice. + * + * 3) If we've made it this far, now look for disks with bad blocks and choose + * the one with most number of sectors. + * + * 4) If we are all the way at the end, we have no choice but to use a disk even + * if it is write mostly. + * + * The rdev for the device selected will have nr_pending incremented. + */ +static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, + int *max_sectors) +{ + int disk; - sectors = best_good_sectors; - update_read_sectors(conf, disk, this_sector, sectors); - } - *max_sectors = sectors; + clear_bit(R1BIO_FailFast, &r1_bio->state); + + if (raid1_should_read_first(conf->mddev, r1_bio->sector, + r1_bio->sectors)) + return choose_first_rdev(conf, r1_bio, max_sectors); - if (best_disk >= 0) - return best_disk; + disk = choose_best_rdev(conf, r1_bio); + if (disk >= 0) { + *max_sectors = r1_bio->sectors; + update_read_sectors(conf, disk, r1_bio->sector, + r1_bio->sectors); + return disk; + } /* * If we are here it means we didn't find a perfectly good disk so -- cgit v1.2.3 From 530b1dbd97846b110ea8a94c7cc903eca21786e5 Mon Sep 17 00:00:00 2001 From: Arturas Moskvinas Date: Fri, 1 Mar 2024 09:12:04 +0200 Subject: gpio: 74x164: Enable output pins after registers are reset Chip outputs are enabled[1] before actual reset is performed[2] which might cause pin output value to flip flop if previous pin value was set to 1. Fix that behavior by making sure chip is fully reset before all outputs are enabled. Flip-flop can be noticed when module is removed and inserted again and one of the pins was changed to 1 before removal. 100 microsecond flipping is noticeable on oscilloscope (100khz SPI bus). For a properly reset chip - output is enabled around 100 microseconds (on 100khz SPI bus) later during probing process hence should be irrelevant behavioral change. Fixes: 7ebc194d0fd4 (gpio: 74x164: Introduce 'enable-gpios' property) Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L130 [1] Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L150 [2] Signed-off-by: Arturas Moskvinas Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-74x164.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c33310517..753e7be039e4 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; -- cgit v1.2.3 From e4aec4daa8c009057b5e063db1b7322252c92dc8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Feb 2024 21:28:46 +0200 Subject: gpiolib: Fix the error path order in gpiochip_add_data_with_key() After shuffling the code, error path wasn't updated correctly. Fix it here. Fixes: 2f4133bb5f14 ("gpiolib: No need to call gpiochip_remove_pin_ranges() twice") Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e434e8cc1229..58a839c1ae48 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -999,11 +999,11 @@ err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); err_remove_acpi_chip: acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); -- cgit v1.2.3 From adf47524b56a791734ae24da8412c6579e2fab4f Mon Sep 17 00:00:00 2001 From: Peter Martincic Date: Mon, 27 Nov 2023 13:35:24 -0800 Subject: hv_utils: Allow implicit ICTIMESYNCFLAG_SYNC Hyper-V hosts can omit the _SYNC flag to due a bug on resume from modern suspend. In such a case, the guest may fail to update its time-of-day to account for the period when it was suspended, and could proceed with a significantly wrong time-of-day. In such a case when the guest is significantly behind, fix it by treating a _SAMPLE the same as if _SYNC was received so that the guest time-of-day is updated. This is hidden behind param hv_utils.timesync_implicit. Signed-off-by: Peter Martincic Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20231127213524.52783-1-pmartincic@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20231127213524.52783-1-pmartincic@linux.microsoft.com> --- drivers/hv/hv_util.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 42aec2c5606a..9c97c4065fe7 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -296,6 +296,11 @@ static struct { spinlock_t lock; } host_ts; +static bool timesync_implicit; + +module_param(timesync_implicit, bool, 0644); +MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind"); + static inline u64 reftime_to_ns(u64 reftime) { return (reftime - WLTIMEDELTA) * 100; @@ -344,6 +349,29 @@ static void hv_set_host_time(struct work_struct *work) do_settimeofday64(&ts); } +/* + * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume. + * Force a sync if the guest is behind. + */ +static inline bool hv_implicit_sync(u64 host_time) +{ + struct timespec64 new_ts; + struct timespec64 threshold_ts; + + new_ts = ns_to_timespec64(reftime_to_ns(host_time)); + ktime_get_real_ts64(&threshold_ts); + + threshold_ts.tv_sec += 5; + + /* + * If guest behind the host by 5 or more seconds. + */ + if (timespec64_compare(&new_ts, &threshold_ts) >= 0) + return true; + + return false; +} + /* * Synchronize time with host after reboot, restore, etc. * @@ -384,7 +412,8 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags) spin_unlock_irqrestore(&host_ts.lock, flags); /* Schedule work to do do_settimeofday64() */ - if (adj_flags & ICTIMESYNCFLAG_SYNC) + if ((adj_flags & ICTIMESYNCFLAG_SYNC) || + (timesync_implicit && hv_implicit_sync(host_ts.host_time))) schedule_work(&adj_time_work); } -- cgit v1.2.3 From 20ee2ae8c58990ca9e98954b7ac2b66c53a0310e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 31 Jan 2024 22:00:22 -0800 Subject: fbdev/hyperv_fb: Fix logic error for Gen2 VMs in hvfb_getmem() A recent commit removing the use of screen_info introduced a logic error. The error causes hvfb_getmem() to always return -ENOMEM for Generation 2 VMs. As a result, the Hyper-V frame buffer device fails to initialize. The error was introduced by removing an "else if" clause, leaving Gen2 VMs to always take the -ENOMEM error path. Fix the problem by removing the error path "else" clause. Gen 2 VMs now always proceed through the MMIO memory allocation code, but with "base" and "size" defaulting to 0. Fixes: 0aa0838c84da ("fbdev/hyperv_fb: Remove firmware framebuffers with aperture helpers") Signed-off-by: Michael Kelley Reviewed-by: Thomas Zimmermann Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240201060022.233666-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240201060022.233666-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index c26ee6fd73c9..8fdccf033b2d 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1010,8 +1010,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); - } else { - goto err1; } /* -- cgit v1.2.3 From 8db0edc4acb1c654e4c115a3978fb2681c5bfb74 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 11 Jan 2024 08:54:50 -0800 Subject: Drivers: hv: vmbus: Remove duplication and cleanup code in create_gpadl_header() create_gpadl_header() creates a message header, and one or more message bodies if the number of GPADL entries exceeds what fits in the header. Currently the code for creating the message header is duplicated in the two halves of the main "if" statement governing whether message bodies are created. Eliminate the duplication by making minor tweaks to the logic and associated comments. While here, simplify the handling of memory allocation errors, and use umin() instead of open coding it. For ease of review, the indentation of sizable chunks of code is *not* changed. A follow-on patch updates only the indentation. No functional change. Suggested-by: Dan Carpenter Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240111165451.269418-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240111165451.269418-1-mhklinux@outlook.com> --- drivers/hv/channel.c | 54 ++++++++++------------------------------------------ 1 file changed, 10 insertions(+), 44 deletions(-) (limited to 'drivers') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56f7e06c673e..604f5aff8502 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -322,21 +322,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT; - /* do we need a gpadl body msg */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - sizeof(struct vmbus_channel_gpadl_header) - sizeof(struct gpa_range); - pfncount = pfnsize / sizeof(u64); + pfncount = umin(pagecount, pfnsize / sizeof(u64)); - if (pagecount > pfncount) { - /* we need a gpadl body */ - /* fill in the header */ msgsize = sizeof(struct vmbus_channel_msginfo) + sizeof(struct vmbus_channel_gpadl_header) + sizeof(struct gpa_range) + pfncount * sizeof(u64); msgheader = kzalloc(msgsize, GFP_KERNEL); if (!msgheader) - goto nomem; + return -ENOMEM; INIT_LIST_HEAD(&msgheader->submsglist); msgheader->msgsize = msgsize; @@ -356,18 +352,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pfnsum = pfncount; pfnleft = pagecount - pfncount; - /* how many pfns can we fit */ + /* how many pfns can we fit in a body message */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - sizeof(struct vmbus_channel_gpadl_body); pfncount = pfnsize / sizeof(u64); - /* fill in the body */ + /* + * If pfnleft is zero, everything fits in the header and no body + * messages are needed + */ while (pfnleft) { - if (pfnleft > pfncount) - pfncurr = pfncount; - else - pfncurr = pfnleft; - + pfncurr = umin(pfncount, pfnleft); msgsize = sizeof(struct vmbus_channel_msginfo) + sizeof(struct vmbus_channel_gpadl_body) + pfncurr * sizeof(u64); @@ -386,8 +381,8 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, list_del(&pos->msglistentry); kfree(pos); } - - goto nomem; + kfree(msgheader); + return -ENOMEM; } msgbody->msgsize = msgsize; @@ -410,37 +405,8 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pfnsum += pfncurr; pfnleft -= pfncurr; } - } else { - /* everything fits in a header */ - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pagecount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (msgheader == NULL) - goto nomem; - - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - - *msginfo = msgheader; - } return 0; -nomem: - kfree(msgheader); - kfree(msgbody); - return -ENOMEM; } /* -- cgit v1.2.3 From 9645e74414fb725b0305f4f03035b68207659007 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 11 Jan 2024 08:54:51 -0800 Subject: Drivers: hv: vmbus: Update indentation in create_gpadl_header() A previous commit left the indentation in create_gpadl_header() unchanged for ease of review. Update the indentation and remove line wrap in two places where it is no longer necessary. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240111165451.269418-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240111165451.269418-2-mhklinux@outlook.com> --- drivers/hv/channel.c | 142 +++++++++++++++++++++++++-------------------------- 1 file changed, 70 insertions(+), 72 deletions(-) (limited to 'drivers') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 604f5aff8502..adbf674355b2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -327,85 +327,83 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, sizeof(struct gpa_range); pfncount = umin(pagecount, pfnsize / sizeof(u64)); - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pfncount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (!msgheader) - return -ENOMEM; - - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - *msginfo = msgheader; - - pfnsum = pfncount; - pfnleft = pagecount - pfncount; - - /* how many pfns can we fit in a body message */ - pfnsize = MAX_SIZE_CHANNEL_MESSAGE - - sizeof(struct vmbus_channel_gpadl_body); - pfncount = pfnsize / sizeof(u64); + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_header) + + sizeof(struct gpa_range) + pfncount * sizeof(u64); + msgheader = kzalloc(msgsize, GFP_KERNEL); + if (!msgheader) + return -ENOMEM; - /* - * If pfnleft is zero, everything fits in the header and no body - * messages are needed - */ - while (pfnleft) { - pfncurr = umin(pfncount, pfnleft); - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_body) + - pfncurr * sizeof(u64); - msgbody = kzalloc(msgsize, GFP_KERNEL); - - if (!msgbody) { - struct vmbus_channel_msginfo *pos = NULL; - struct vmbus_channel_msginfo *tmp = NULL; - /* - * Free up all the allocated messages. - */ - list_for_each_entry_safe(pos, tmp, - &msgheader->submsglist, - msglistentry) { - - list_del(&pos->msglistentry); - kfree(pos); - } - kfree(msgheader); - return -ENOMEM; - } + INIT_LIST_HEAD(&msgheader->submsglist); + msgheader->msgsize = msgsize; + + gpadl_header = (struct vmbus_channel_gpadl_header *) + msgheader->msg; + gpadl_header->rangecount = 1; + gpadl_header->range_buflen = sizeof(struct gpa_range) + + pagecount * sizeof(u64); + gpadl_header->range[0].byte_offset = 0; + gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); + for (i = 0; i < pfncount; i++) + gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( + type, kbuffer, size, send_offset, i); + *msginfo = msgheader; + + pfnsum = pfncount; + pfnleft = pagecount - pfncount; + + /* how many pfns can we fit in a body message */ + pfnsize = MAX_SIZE_CHANNEL_MESSAGE - + sizeof(struct vmbus_channel_gpadl_body); + pfncount = pfnsize / sizeof(u64); - msgbody->msgsize = msgsize; - gpadl_body = - (struct vmbus_channel_gpadl_body *)msgbody->msg; + /* + * If pfnleft is zero, everything fits in the header and no body + * messages are needed + */ + while (pfnleft) { + pfncurr = umin(pfncount, pfnleft); + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_body) + + pfncurr * sizeof(u64); + msgbody = kzalloc(msgsize, GFP_KERNEL); + if (!msgbody) { + struct vmbus_channel_msginfo *pos = NULL; + struct vmbus_channel_msginfo *tmp = NULL; /* - * Gpadl is u32 and we are using a pointer which could - * be 64-bit - * This is governed by the guest/host protocol and - * so the hypervisor guarantees that this is ok. + * Free up all the allocated messages. */ - for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, - kbuffer, size, send_offset, pfnsum + i); - - /* add to msg header */ - list_add_tail(&msgbody->msglistentry, - &msgheader->submsglist); - pfnsum += pfncurr; - pfnleft -= pfncurr; + list_for_each_entry_safe(pos, tmp, + &msgheader->submsglist, + msglistentry) { + + list_del(&pos->msglistentry); + kfree(pos); + } + kfree(msgheader); + return -ENOMEM; } + msgbody->msgsize = msgsize; + gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg; + + /* + * Gpadl is u32 and we are using a pointer which could + * be 64-bit + * This is governed by the guest/host protocol and + * so the hypervisor guarantees that this is ok. + */ + for (i = 0; i < pfncurr; i++) + gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, + kbuffer, size, send_offset, pfnsum + i); + + /* add to msg header */ + list_add_tail(&msgbody->msglistentry, &msgheader->submsglist); + pfnsum += pfncurr; + pfnleft -= pfncurr; + } + return 0; } -- cgit v1.2.3 From ec5c54a9d3c4f9c15e647b049fea401ee5258696 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Feb 2024 18:25:49 +0100 Subject: gpio: fix resource unwinding order in error path Hogs are added *after* ACPI so should be removed *before* in error path. Fixes: a411e81e61df ("gpiolib: add hogs support for machine code") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/gpio/gpiolib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 58a839c1ae48..75be4a3ca7f8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -968,11 +968,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -997,11 +997,11 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: gpiochip_free_valid_mask(gc); -- cgit v1.2.3 From 1eecc7ab82c42133b748e1895275942a054a7f67 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: net: lan78xx: fix runtime PM count underflow on link stop Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ba6c8ac2a736..d2aa2c5b1989 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3135,7 +3135,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } -- cgit v1.2.3 From eb2c11b27c58a62b5027b77f702c15cd0ca38f7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2024 17:06:56 +0100 Subject: net: bql: fix building with BQL disabled It is now possible to disable BQL, but that causes the cpsw driver to break: drivers/net/ethernet/ti/am65-cpsw-nuss.c:297:28: error: no member named 'dql' in 'struct netdev_queue' 297 | dql_avail(&netif_txq->dql), There is already a helper function in net/sch_generic.h that could be used to help here. Move its implementation into the common linux/netdevice.h along with the other bql interfaces and change both users over to the new interface. Fixes: ea7f3cfaa588 ("net: bql: allow the config to be disabled") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- include/linux/netdevice.h | 10 ++++++++++ include/net/sch_generic.h | 7 +------ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9d2f4ac783e4..2939a21ca74f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -294,7 +294,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, txqueue, netif_tx_queue_stopped(netif_txq), jiffies_to_msecs(jiffies - trans_start), - dql_avail(&netif_txq->dql), + netdev_queue_dql_avail(netif_txq), k3_cppi_desc_pool_avail(tx_chn->desc_pool)); if (netif_tx_queue_stopped(netif_txq)) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9c973b92294..735a9386fcf8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3499,6 +3499,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 934fdb977551..cefe0c4bdae3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { -#ifdef CONFIG_BQL - /* Non-BQL migrated drivers will return 0, too. */ - return dql_avail(&txq->dql); -#else - return 0; -#endif + return netdev_queue_dql_avail(txq); } struct Qdisc_class_ops { -- cgit v1.2.3 From c0afb6b88fbbc177fa322a835f874be217bffe45 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Feb 2024 17:13:16 +0800 Subject: crypto: rk3288 - Fix use after free in unprepare The unprepare call must be carried out before the finalize call as the latter can free the request. Fixes: c66c17a0f69b ("crypto: rk3288 - Remove prepare/unprepare request") Reported-by: Andrey Skvortsov Cc: Signed-off-by: Herbert Xu Reviewed-by: Andrey Skvortsov Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 1b13b4aa16ec..a235e6c300f1 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -332,12 +332,12 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) theend: pm_runtime_put_autosuspend(rkc->dev); + rk_hash_unprepare(engine, breq); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); - rk_hash_unprepare(engine, breq); - return 0; } -- cgit v1.2.3 From 6384c56c99d98c84afea5b9ec7029a6e153ae431 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 27 Feb 2024 06:48:21 +0000 Subject: iommu/sva: Fix SVA handle sharing in multi device case iommu_sva_bind_device will directly goto out in multi-device case when found existing domain, ignoring list_add handle, which causes the handle to fail to be shared. Fixes: 65d4418c5002 ("iommu/sva: Restore SVA handle sharing") Signed-off-by: Zhangfei Gao Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240227064821.128-1-zhangfei.gao@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 7f91c8d0064b..65814cbc8402 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -117,11 +117,11 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm if (ret) goto out_free_domain; domain->users = 1; - refcount_set(&handle->users, 1); list_add(&domain->next, &mm->iommu_mm->sva_domains); - list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); out: + refcount_set(&handle->users, 1); + list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; handle->domain = domain; -- cgit v1.2.3 From 8e0ef412869430d114158fc3b9b1fb111e247bd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Feb 2024 14:56:42 -0800 Subject: dm: use queue_limits_set Use queue_limits_set which validates the limits and takes care of updating the readahead settings instead of directly assigning them to the queue. For that make sure all limits are actually updated before the assignment. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20240228225653.947152-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- drivers/md/dm-table.c | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/block/blk-settings.c b/block/blk-settings.c index 865fe4ebbf9b..13865a9f8972 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -267,7 +267,7 @@ int queue_limits_commit_update(struct request_queue *q, EXPORT_SYMBOL_GPL(queue_limits_commit_update); /** - * queue_limits_commit_set - apply queue limits to queue + * queue_limits_set - apply queue limits to queue * @q: queue to update * @lim: limits to apply * diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 41f1d731ae5a..88114719fe18 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, bool wc = false, fua = false; int r; - /* - * Copy table's limits to the DM device's request_queue - */ - q->limits = *limits; - if (dm_table_supports_nowait(t)) blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); else blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); if (!dm_table_supports_discards(t)) { - q->limits.max_discard_sectors = 0; - q->limits.max_hw_discard_sectors = 0; - q->limits.discard_granularity = 0; - q->limits.discard_alignment = 0; - q->limits.discard_misaligned = 0; + limits->max_hw_discard_sectors = 0; + limits->discard_granularity = 0; + limits->discard_alignment = 0; + limits->discard_misaligned = 0; } + if (!dm_table_supports_write_zeroes(t)) + limits->max_write_zeroes_sectors = 0; + if (!dm_table_supports_secure_erase(t)) - q->limits.max_secure_erase_sectors = 0; + limits->max_secure_erase_sectors = 0; + + r = queue_limits_set(q, limits); + if (r) + return r; if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; @@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - if (!dm_table_supports_write_zeroes(t)) - q->limits.max_write_zeroes_sectors = 0; - dm_table_verify_integrity(t); /* @@ -2047,7 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } dm_update_crypto_profile(q, t); - disk_update_readahead(t->md->disk); /* * Check for request-based device is left to -- cgit v1.2.3 From 9d2b6fa09d15d021fb83ec6f1336176ebaebbeec Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Mar 2024 08:37:46 -0600 Subject: RAS: Export helper to get ras_debugfs_dir Export a getter instead of the debugfs node directly so that, other in-tree-only RAS modules can use it. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240301143748.854090-2-yazen.ghannam@amd.com --- drivers/ras/cec.c | 10 ++++++++-- drivers/ras/debugfs.c | 8 +++++++- drivers/ras/debugfs.h | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 321af498ee11..e440b15fbabc 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -480,9 +480,15 @@ DEFINE_SHOW_ATTRIBUTE(array); static int __init create_debugfs_nodes(void) { - struct dentry *d, *pfn, *decay, *count, *array; + struct dentry *d, *pfn, *decay, *count, *array, *dfs; - d = debugfs_create_dir("cec", ras_debugfs_dir); + dfs = ras_get_debugfs_root(); + if (!dfs) { + pr_warn("Error getting RAS debugfs root!\n"); + return -1; + } + + d = debugfs_create_dir("cec", dfs); if (!d) { pr_warn("Error creating cec debugfs node!\n"); return -1; diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c index ffb973c328e3..42afd3de68b2 100644 --- a/drivers/ras/debugfs.c +++ b/drivers/ras/debugfs.c @@ -3,10 +3,16 @@ #include #include "debugfs.h" -struct dentry *ras_debugfs_dir; +static struct dentry *ras_debugfs_dir; static atomic_t trace_count = ATOMIC_INIT(0); +struct dentry *ras_get_debugfs_root(void) +{ + return ras_debugfs_dir; +} +EXPORT_SYMBOL_GPL(ras_get_debugfs_root); + int ras_userspace_consumers(void) { return atomic_read(&trace_count); diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index c07443b462ad..4749ccdeeba1 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,6 @@ #include -extern struct dentry *ras_debugfs_dir; +struct dentry *ras_get_debugfs_root(void); #endif /* __RAS_DEBUGFS_H__ */ -- cgit v1.2.3 From 838850c50884cdd1c96fce1063ef918c394d4bdc Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 1 Mar 2024 08:37:47 -0600 Subject: RAS/AMD/FMPM: Save SPA values The system physical address (SPA) of an error is not a stable value. It will change depending on the location of the memory: parts can be swapped. And it will change depending on memory topology: NUMA nodes and/or interleaving can be adjusted. Therefore, the SPA value is not part of the "FRU Memory Poison" record format. And it will not be saved to persistent storage. However, the SPA values can be helpful during debug and for system admins during run time. Save the SPA values in a separate structure. This is updated when records are restored and when new errors are saved. [ bp: Make error messages more user friendly and add and correct comments. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240301143748.854090-3-yazen.ghannam@amd.com --- drivers/ras/amd/fmpm.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 80dd112b720a..8c3188488673 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -111,6 +111,11 @@ struct fru_rec { */ static struct fru_rec **fru_records; +/* system physical addresses array */ +static u64 *spa_entries; + +#define INVALID_SPA ~0ULL + #define CPER_CREATOR_FMP \ GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ 0xa0, 0x33, 0x08, 0x75) @@ -120,7 +125,7 @@ static struct fru_rec **fru_records; 0x12, 0x0a, 0x44, 0x58) /** - * DOC: fru_poison_entries (byte) + * DOC: max_nr_entries (byte) * Maximum number of descriptor entries possible for each FRU. * * Values between '1' and '255' are valid. @@ -140,6 +145,9 @@ static unsigned int max_nr_fru; /* Total length of record including headers and list of descriptor entries. */ static size_t max_rec_len; +/* Total number of SPA entries across all FRUs. */ +static unsigned int spa_nr_entries; + /* * Protect the local records cache in fru_records and prevent concurrent * writes to storage. This is only needed after init once notifier block @@ -269,6 +277,54 @@ static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) return false; } +static void save_spa(struct fru_rec *rec, unsigned int entry, + u64 addr, u64 id, unsigned int cpu) +{ + unsigned int i, fru_idx, spa_entry; + struct atl_err a_err; + unsigned long spa; + + if (entry >= max_nr_entries) { + pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n", + entry, max_nr_entries); + return; + } + + /* spa_nr_entries is always multiple of max_nr_entries */ + for (i = 0; i < spa_nr_entries; i += max_nr_entries) { + fru_idx = i / max_nr_entries; + if (fru_records[fru_idx] == rec) + break; + } + + if (i >= spa_nr_entries) { + pr_warn_once("FRU record %d not found\n", i); + return; + } + + spa_entry = i + entry; + if (spa_entry >= spa_nr_entries) { + pr_warn_once("spa_entries[] index out-of-bounds\n"); + return; + } + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(spa)) { + pr_debug("Failed to get system address\n"); + return; + } + + spa_entries[spa_entry] = spa; + pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n", + fru_idx, entry, spa_entry, spa_entries[spa_entry]); +} + static void update_fru_record(struct fru_rec *rec, struct mce *m) { struct cper_sec_fru_mem_poison *fmp = &rec->fmp; @@ -301,6 +357,7 @@ static void update_fru_record(struct fru_rec *rec, struct mce *m) entry = fmp->nr_entries; save_fpd: + save_spa(rec, entry, m->addr, m->ipid, m->extcpu); fpd_dest = &rec->entries[entry]; memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); @@ -385,6 +442,7 @@ static void retire_mem_fmp(struct fru_rec *rec) continue; retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); + save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu); } } @@ -696,6 +754,8 @@ static int get_system_info(void) if (!max_nr_entries) max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; + spa_nr_entries = max_nr_fru * max_nr_entries; + max_rec_len = sizeof(struct fru_rec); max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; @@ -714,6 +774,7 @@ static void free_records(void) kfree(rec); kfree(fru_records); + kfree(spa_entries); } static int allocate_records(void) @@ -734,6 +795,15 @@ static int allocate_records(void) } } + spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL); + if (!spa_entries) { + ret = -ENOMEM; + goto out_free; + } + + for (i = 0; i < spa_nr_entries; i++) + spa_entries[i] = INVALID_SPA; + return ret; out_free: -- cgit v1.2.3 From 7d19eea51757ad72faf4b0493e5bde85ca62012e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 1 Mar 2024 08:37:48 -0600 Subject: RAS/AMD/FMPM: Add debugfs interface to print record entries It is helpful to see the saved record entries during run time in human-readable format. This is useful for testing during module development. It can also be used by system admins to quickly and easily see the state of the system. Provide a sequential file in debugfs to print fields of interest from the FRU records and their entries. Don't fail to load the module if the debugfs interface is not available. This is a convenience feature which does not affect other module functionality. The new interface reads the record entries and should hold the mutex. Expand the mutex code comment to clarify when it should be held. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240301143748.854090-4-yazen.ghannam@amd.com --- drivers/ras/amd/fmpm.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'drivers') diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 8c3188488673..0963c9e7b853 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -54,6 +54,8 @@ #include #include +#include "../debugfs.h" + #define INVALID_CPU UINT_MAX /* Validation Bits */ @@ -116,6 +118,9 @@ static u64 *spa_entries; #define INVALID_SPA ~0ULL +static struct dentry *fmpm_dfs_dir; +static struct dentry *fmpm_dfs_entries; + #define CPER_CREATOR_FMP \ GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ 0xa0, 0x33, 0x08, 0x75) @@ -152,6 +157,11 @@ static unsigned int spa_nr_entries; * Protect the local records cache in fru_records and prevent concurrent * writes to storage. This is only needed after init once notifier block * registration is done. + * + * The majority of a record is fixed at module init and will not change + * during run time. The entries within a record will be updated as new + * errors are reported. The mutex should be held whenever the entries are + * accessed during run time. */ static DEFINE_MUTEX(fmpm_update_mutex); @@ -815,6 +825,124 @@ out: return ret; } +static void *fmpm_start(struct seq_file *f, loff_t *pos) +{ + if (*pos >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos) +{ + if (++(*pos) >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void fmpm_stop(struct seq_file *f, void *data) +{ +} + +#define SHORT_WIDTH 8 +#define U64_WIDTH 18 +#define TIMESTAMP_WIDTH 19 +#define LONG_WIDTH 24 +#define U64_PAD (LONG_WIDTH - U64_WIDTH) +#define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH) +static int fmpm_show(struct seq_file *f, void *data) +{ + unsigned int fru_idx, entry, spa_entry, line; + struct cper_fru_poison_desc *fpd; + struct fru_rec *rec; + + line = *(loff_t *)data; + if (line == 0) { + seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx"); + seq_printf(f, "%-*s", LONG_WIDTH, "fru_id"); + seq_printf(f, "%-*s", SHORT_WIDTH, "entry"); + seq_printf(f, "%-*s", LONG_WIDTH, "timestamp"); + seq_printf(f, "%-*s", LONG_WIDTH, "hw_id"); + seq_printf(f, "%-*s", LONG_WIDTH, "addr"); + seq_printf(f, "%-*s", LONG_WIDTH, "spa"); + goto out_newline; + } + + spa_entry = line - 1; + fru_idx = spa_entry / max_nr_entries; + entry = spa_entry % max_nr_entries; + + rec = fru_records[fru_idx]; + if (!rec) + goto out; + + seq_printf(f, "%-*u", SHORT_WIDTH, fru_idx); + seq_printf(f, "0x%016llx%-*s", rec->fmp.fru_id, U64_PAD, ""); + seq_printf(f, "%-*u", SHORT_WIDTH, entry); + + mutex_lock(&fmpm_update_mutex); + + if (entry >= rec->fmp.nr_entries) { + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + goto out_unlock; + } + + fpd = &rec->entries[entry]; + + seq_printf(f, "%ptT%-*s", &fpd->timestamp, TS_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->hw_id, U64_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->addr, U64_PAD, ""); + + if (spa_entries[spa_entry] == INVALID_SPA) + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + else + seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, ""); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +out_newline: + seq_putc(f, '\n'); +out: + return 0; +} + +static const struct seq_operations fmpm_seq_ops = { + .start = fmpm_start, + .next = fmpm_next, + .stop = fmpm_stop, + .show = fmpm_show, +}; + +static int fmpm_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fmpm_seq_ops); +} + +static const struct file_operations fmpm_fops = { + .open = fmpm_open, + .release = seq_release, + .read = seq_read, + .llseek = seq_lseek, +}; + +static void setup_debugfs(void) +{ + struct dentry *dfs = ras_get_debugfs_root(); + + if (!dfs) + return; + + fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs); + if (!fmpm_dfs_dir) + return; + + fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops); + if (!fmpm_dfs_entries) + debugfs_remove(fmpm_dfs_dir); +} + static const struct x86_cpu_id fmpm_cpuids[] = { X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), { } @@ -856,6 +984,8 @@ static int __init fru_mem_poison_init(void) if (ret) goto out_free; + setup_debugfs(); + retire_mem_records(); mce_register_decode_chain(&fru_mem_poison_nb); @@ -872,6 +1002,7 @@ out: static void __exit fru_mem_poison_exit(void) { mce_unregister_decode_chain(&fru_mem_poison_nb); + debugfs_remove(fmpm_dfs_dir); free_records(); } -- cgit v1.2.3 From eabf5dfc2d6048d8415cd22d38d7d3e0bdb4dff9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:44:08 -0800 Subject: pktcdvd: don't set max_hw_sectors on the underlying device pktcdvd sets max_hw_sectors on the queue of the underlying device that it doesn't own (and doesn't reset it ever) since the driver was merged. This can create all kinds of problems as the underlying driver doesn't even know about it changing the limit. As the state purpose is to not create I/Os larger than a single frame, and pktcdvd never builds bios larger than that, just set REQ_NOMERGE on the bios it submits so that largers I/Os never get built. Note: I don't have packet writing hardware, so this is compile tested only. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229144408.1047967-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 12fcc881b04f..9071c4ebc1b9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -828,6 +828,12 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, */ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) { + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + bio->bi_opf |= REQ_NOMERGE; + spin_lock(&pd->iosched.lock); if (bio_data_dir(bio) == READ) bio_list_add(&pd->iosched.read_queue, bio); @@ -2191,11 +2197,6 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) ret = pkt_open_write(pd); if (ret) goto out_putdev; - /* - * Some CDRW drives can not handle writes larger than one packet, - * even if the size is a multiple of the packet size. - */ - blk_queue_max_hw_sectors(q, pd->settings.size); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); -- cgit v1.2.3 From 7ea201f2cc1da999b9a0a23ea20b64eb2c4719a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:44 -0800 Subject: nbd: don't clear discard_sectors in nbd_config_put nbd_config_put currently clears discard_sectors when unusing a device. This is pretty odd behavior and different from the sector size configuration which is simply left in places and then reconfigured when nbd_set_size is as part of configuring the device. Change nbd_set_size to clear discard_sectors if discard is not supported so that all the queue limits changes are handled in one place. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9ee9587375fa..384750d5259f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -336,6 +336,8 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (nbd->config->flags & NBD_FLAG_SEND_TRIM) blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + else + blk_queue_max_discard_sectors(nbd->disk->queue, 0); blk_queue_logical_block_size(nbd->disk->queue, blksize); blk_queue_physical_block_size(nbd->disk->queue, blksize); @@ -1351,7 +1353,6 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; - blk_queue_max_discard_sectors(nbd->disk->queue, 0); mutex_unlock(&nbd->config_lock); nbd_put(nbd); -- cgit v1.2.3 From 242a49e5c8784e93a99e4dc4277b28a8ba85eac5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:45 -0800 Subject: nbd: freeze the queue for queue limits updates nbd currently updates the logical and physical block sizes as well as the discard_sectors on a live queue. Freeze the queue first to make sure there are not commands in flight that can see torn or inconsistent limits. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 384750d5259f..22ee0ed9aa6d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -316,7 +316,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, nsock->sent = 0; } -static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, +static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { if (!blksize) @@ -348,6 +348,18 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, return 0; } +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) +{ + int error; + + blk_mq_freeze_queue(nbd->disk->queue); + error = __nbd_set_size(nbd, bytesize, blksize); + blk_mq_unfreeze_queue(nbd->disk->queue); + + return error; +} + static void nbd_complete_rq(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); -- cgit v1.2.3 From 268283244c0f018dec8bf4a9c69ce50684561f46 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:46 -0800 Subject: nbd: use the atomic queue limits API in nbd_set_size Use queue_limits_start_update / queue_limits_commit_update to update all the limits in one go and with proper sanity checking. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 22ee0ed9aa6d..9d4ec9273bf9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -319,6 +319,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { + struct queue_limits lim; + int error; + if (!blksize) blksize = 1u << NBD_DEF_BLKSIZE_BITS; @@ -334,12 +337,16 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (!nbd->pid) return 0; + lim = queue_limits_start_update(nbd->disk->queue); if (nbd->config->flags & NBD_FLAG_SEND_TRIM) - blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + lim.max_hw_discard_sectors = UINT_MAX; else - blk_queue_max_discard_sectors(nbd->disk->queue, 0); - blk_queue_logical_block_size(nbd->disk->queue, blksize); - blk_queue_physical_block_size(nbd->disk->queue, blksize); + lim.max_hw_discard_sectors = 0; + lim.logical_block_size = blksize; + lim.physical_block_size = blksize; + error = queue_limits_commit_update(nbd->disk->queue, &lim); + if (error) + return error; if (max_part) set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); -- cgit v1.2.3 From cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 +++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bd541527c8c7..99876b765b08 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2939,8 +2939,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10524,6 +10524,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10540,6 +10578,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10548,9 +10591,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10578,9 +10618,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10589,6 +10626,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** -- cgit v1.2.3 From d562b11c1eac7d73f4c778b4cbe5468f86b1f20d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: i40e: disable NAPI right after disabling irqs when handling xsk_pool Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 54eb55464e31..89a3401d20ab 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13560,9 +13560,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); -- cgit v1.2.3 From 99099c6bc75a30b76bb5d6774a0509ab6f06af05 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:53 +0100 Subject: ice: reorder disabling IRQ and NAPI in ice_qp_dis ice_qp_dis() currently does things in very mixed way. Tx is stopped before disabling IRQ on related queue vector, then it takes care of disabling Rx and finally NAPI is disabled. Let us start with disabling IRQs in the first place followed by turning off NAPI. Then it is safe to handle queues. One subtle change on top of that is that even though ice_qp_ena() looks more sane, clear ICE_CFG_BUSY as the last thing there. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8b81a1677045..2eecd0f39aa6 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -179,6 +179,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -195,13 +199,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -259,11 +260,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) return err; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); return 0; } -- cgit v1.2.3 From 4035c72dc1ba81a96f94de84dfd5409056c1d9c9 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 23 Feb 2024 07:40:24 +0100 Subject: ice: reconfig host after changing MSI-X on VF During VSI reconfiguration filters and VSI config which is set in ice_vf_init_host_cfg() are lost. Recall the host configuration function to restore them. Without this config VF on which MSI-X amount was changed might had a connection problems. Fixes: 4d38cb44bd32 ("ice: manage VFs MSI-X using resource tracking") Reviewed-by: Jacob Keller Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a94a1c48c3de..b0f78c2f2790 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1068,6 +1068,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) struct ice_pf *pf = pci_get_drvdata(pdev); u16 prev_msix, prev_queues, queues; bool needs_rebuild = false; + struct ice_vsi *vsi; struct ice_vf *vf; int id; @@ -1102,6 +1103,10 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (!vf) return -ENOENT; + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -ENOENT; + prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; @@ -1122,7 +1127,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (vf->first_vector_idx < 0) goto unroll; - if (ice_vf_reconfig_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf) || ice_vf_init_host_cfg(vf, vsi)) { /* Try to rebuild with previous values */ needs_rebuild = true; goto unroll; @@ -1148,8 +1153,10 @@ unroll: if (vf->first_vector_idx < 0) return -EINVAL; - if (needs_rebuild) + if (needs_rebuild) { ice_vf_reconfig_vsi(vf); + ice_vf_init_host_cfg(vf, vsi); + } ice_ena_vf_mappings(vf); ice_put_vf(vf); -- cgit v1.2.3 From 7cb50f6c9fbaa1c0b80100b8971bf13db5d75d06 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 23 Feb 2024 21:24:35 -0800 Subject: of: property: fw_devlink: Fix stupid bug in remote-endpoint parsing Introduced a stupid bug in commit 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") due to a last minute incorrect edit of "index !=0" into "!index". This patch fixes it to be "index > 0" to match the comment right next to it. Reported-by: Luca Ceresoli Link: https://lore.kernel.org/lkml/20240223171849.10f9901d@booty/ Fixes: 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") Signed-off-by: Saravana Kannan Reviewed-by: Herve Codina Reviewed-by: Luca Ceresoli Tested-by: Luca Ceresoli Link: https://lore.kernel.org/r/20240224052436.3552333-1-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/of/property.c b/drivers/of/property.c index b71267c6667c..fa8cd33be131 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1304,7 +1304,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ - if (!index || strcmp(prop_name, "remote-endpoint")) + if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); -- cgit v1.2.3 From 8deeefb24786ea7950b37bde4516b286c877db00 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 19 Oct 2023 04:49:54 +0300 Subject: Revert "net/mlx5: Block entering switchdev mode with ns inconsistency" This reverts commit 662404b24a4c4d839839ed25e3097571f5938b9b. The revert is required due to the suspicion it is not good for anything and cause crash. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 23 ---------------------- 1 file changed, 23 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0455134c98e..14b3bd3c5e2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3658,22 +3658,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - struct net *devl_net, *netdev_net; - bool ret = false; - - mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); - if (dev->mlx5e_res.uplink_netdev) { - netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); - ret = net_eq(devl_net, netdev_net); - } - mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); - return ret; -} - int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -3718,13 +3702,6 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && - !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { - NL_SET_ERR_MSG_MOD(extack, - "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); - return -EPERM; - } - mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { -- cgit v1.2.3 From b7bbd698c90591546d22093181e266785f08c18b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 13 Dec 2023 17:07:08 -0800 Subject: Revert "net/mlx5e: Check the number of elements before walk TC rhashtable" This reverts commit 4e25b661f484df54b6751b65f9ea2434a3b67539. This Commit was mistakenly applied by pulling the wrong tag, remove it. Fixes: 4e25b661f484 ("net/mlx5e: Check the number of elements before walk TC rhashtable") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 190f10aba170..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + if (!rpriv || !rpriv->netdev) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); -- cgit v1.2.3 From 85ea2c5c5ef5f24fe6e6e7028ddd90be1cb5d27e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 11 Jan 2024 01:27:47 +0000 Subject: net/mlx5: E-switch, Change flow rule destination checking The checking in the cited commit is not accurate. In the common case, VF destination is internal, and uplink destination is external. However, uplink destination with packet reformat is considered as internal because firmware uses LB+hairpin to support it. Update the checking so header rewrite rules with both internal and external destinations are not allowed. Fixes: e0e22d59b47a ("net/mlx5: E-switch, Add checking for flow rule destinations") Signed-off-by: Jianbo Liu Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 23 +++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 14b3bd3c5e2f..baaae628b0a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,21 +535,26 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) } static bool -esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest) { - bool vf_dest = false, pf_dest = false; + bool internal_dest = false, external_dest = false; int i; for (i = 0; i < max_dest; i++) { - if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT && + dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK) continue; - if (dests[i].vport.num == MLX5_VPORT_UPLINK) - pf_dest = true; + /* Uplink dest is external, but considered as internal + * if there is reformat because firmware uses LB+hairpin to support it. + */ + if (dests[i].vport.num == MLX5_VPORT_UPLINK && + !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) + external_dest = true; else - vf_dest = true; + internal_dest = true; - if (vf_dest && pf_dest) + if (internal_dest && external_dest) return true; } @@ -695,9 +700,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, /* Header rewrite with combined wire+loopback in FDB is not allowed */ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && - esw_dests_to_vf_pf_vports(dest, i)) { + esw_dests_to_int_external(dest, i)) { esw_warn(esw->dev, - "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n"); rule = ERR_PTR(-EINVAL); goto err_esw_get; } -- cgit v1.2.3 From ac8082a3c7a158640a2c493ec437dd9da881a6a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 16 Jan 2024 20:13:34 +0200 Subject: net/mlx5: Fix fw reporter diagnose output Restore fw reporter diagnose to print the syndrome even if it is zero. Following the cited commit, in this case (syndrome == 0) command returns no output at all. This fix restores command output in case syndrome is cleared: $ devlink health diagnose pci/0000:82:00.0 reporter fw Syndrome: 0 Fixes: d17f98bf7cc9 ("net/mlx5: devlink health: use retained error fmsg API") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8ff6dc9bc803..b5c709bba155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -452,10 +452,10 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct health_buffer __iomem *h = health->health; u8 synd = ioread8(&h->synd); + devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); if (!synd) return 0; - devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); return 0; -- cgit v1.2.3 From 5e6107b499f3fc4748109e1d87fd9603b34f1e0d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 28 Jan 2024 20:43:58 +0200 Subject: net/mlx5: Check capability for fw_reset Functions which can't access MFRL (Management Firmware Reset Level) register, have no use of fw_reset structures or events. Remove fw_reset structures allocation and registration for fw reset events notifications for these functions. Having the devlink param enable_remote_dev_reset on functions that don't have this capability is misleading as these functions are not allowed to influence the reset flow. Hence, this patch removes this parameter for such functions. In addition, return not supported on devlink reload action fw_activate for these functions. Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Moshe Shemesh Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 22 ++++++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 4 +++- 3 files changed, 29 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3e064234f6fe..98d4306929f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -157,6 +157,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (action == DEVLINK_RELOAD_ACTION_FW_ACTIVATE && + !dev->priv.fw_reset) { + NL_SET_ERR_MSG_MOD(extack, "FW activate is unsupported for this function"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index f27eab6e4929..2911aa34a5be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -703,19 +703,30 @@ void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); mlx5_eq_notifier_register(dev, &fw_reset->nb); } void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) { - mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return; + + mlx5_eq_notifier_unregister(dev, &fw_reset->nb); } void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); @@ -733,9 +744,13 @@ static const struct devlink_param mlx5_fw_reset_devlink_params[] = { int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { - struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + struct mlx5_fw_reset *fw_reset; int err; + if (!MLX5_CAP_MCAM_REG(dev, mfrl)) + return 0; + + fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); if (!fw_reset) return -ENOMEM; fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); @@ -771,6 +786,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + devl_params_unregister(priv_to_devlink(dev), mlx5_fw_reset_devlink_params, ARRAY_SIZE(mlx5_fw_reset_devlink_params)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3fd6310b6da6..486b7492050c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10261,7 +10261,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; u8 regs_31_to_10[0x16]; u8 mtmp[0x1]; -- cgit v1.2.3 From dd238b702064b21d25b4fc39a19699319746d655 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 25 Dec 2023 01:47:05 +0000 Subject: net/mlx5e: Change the warning when ignore_flow_level is not supported Downgrade the print from mlx5_core_warn() to mlx5_core_dbg(), as it is just a statement of fact that firmware doesn't support ignore flow level. And change the wording to "firmware flow level support is missing", to make it more accurate. Fixes: ae2ee3be99a8 ("net/mlx5: CT: Remove warning of ignore_flow_level support for VFs") Signed-off-by: Jianbo Liu Suggested-by: Elliott, Robert (Servers) Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 86bf007fd05b..b500cc2c9689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -37,7 +37,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { if (priv->mdev->coredev_type == MLX5_COREDEV_PF) - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + mlx5_core_dbg(priv->mdev, "firmware flow level support is missing\n"); err = -EOPNOTSUPP; goto err_check; } -- cgit v1.2.3 From a71f2147b64941efee156bfda54fd6461d0f95df Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 13 Mar 2023 17:03:03 +0200 Subject: net/mlx5e: Fix MACsec state loss upon state update in offload path The packet number attribute of the SA is incremented by the device rather than the software stack when enabling hardware offload. Because the packet number attribute is managed by the hardware, the software has no insight into the value of the packet number attribute actually written by the device. Previously when MACsec offload was enabled, the hardware object for handling the offload was destroyed when the SA was disabled. Re-enabling the SA would lead to a new hardware object being instantiated. This new hardware object would not have any recollection of the correct packet number for the SA. Instead, destroy the flow steering rule when deactivating the SA and recreate it upon reactivation, preserving the original hardware object. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Emeel Hakim Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/macsec.c | 82 ++++++++++++++-------- 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index d4ebd8743114..b2cabd6ab86c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -310,9 +310,9 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } -static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *sa, - bool is_tx, struct net_device *netdev, u32 fs_id) +static void mlx5e_macsec_cleanup_sa_fs(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) { int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; @@ -322,20 +322,49 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, fs_id); - mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); sa->macsec_rule = NULL; } +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) +{ + mlx5e_macsec_cleanup_sa_fs(macsec, sa, is_tx, netdev, fs_id); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); +} + +static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, bool encrypt, + bool is_tx, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + struct mlx5_macsec_rule_attrs rule_attrs; + union mlx5_macsec_rule *macsec_rule; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5_macsec_fs_add_rule(macsec_fs, ctx, &rule_attrs, fs_id); + if (!macsec_rule) + return -ENOMEM; + + sa->macsec_rule = macsec_rule; + + return 0; +} + static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, bool is_tx, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; - struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; - union mlx5_macsec_rule *macsec_rule; int err; obj_attrs.next_pn = sa->next_pn; @@ -357,20 +386,12 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, if (err) return err; - rule_attrs.macsec_obj_id = sa->macsec_obj_id; - rule_attrs.sci = sa->sci; - rule_attrs.assoc_num = sa->assoc_num; - rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : - MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - - macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); - if (!macsec_rule) { - err = -ENOMEM; - goto destroy_macsec_object; + if (sa->active) { + err = mlx5e_macsec_init_sa_fs(ctx, sa, encrypt, is_tx, fs_id); + if (err) + goto destroy_macsec_object; } - sa->macsec_rule = macsec_rule; - return 0; destroy_macsec_object: @@ -526,9 +547,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) goto destroy_sa; macsec_device->tx_sa[assoc_num] = tx_sa; - if (!secy->operational || - assoc_num != tx_sc->encoding_sa || - !tx_sa->active) + if (!secy->operational) goto out; err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); @@ -595,7 +614,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; if (ctx_tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } else { @@ -604,7 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } out: mutex_unlock(&macsec->lock); @@ -1030,8 +1049,9 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + if (rx_sa->active) + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); rx_sc->rx_sa[assoc_num] = NULL; @@ -1112,8 +1132,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, if (!rx_sa || !rx_sa->macsec_rule) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + mlx5e_macsec_cleanup_sa_fs(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); } } @@ -1124,8 +1144,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, - &rx_sc->sc_xarray_element->fs_id); + err = mlx5e_macsec_init_sa_fs(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -1178,7 +1198,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } for (i = 0; i < MACSEC_NUM_AN; ++i) { @@ -1187,7 +1207,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) continue; if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } -- cgit v1.2.3 From b7cf07586c40f926063d4d09f7de28ff82f62b2a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Feb 2024 13:12:28 -0800 Subject: net/mlx5e: Use a memory barrier to enforce PTP WQ xmit submission tracking occurs after populating the metadata_map Just simply reordering the functions mlx5e_ptp_metadata_map_put and mlx5e_ptpsq_track_metadata in the mlx5e_txwqe_complete context is not good enough since both the compiler and CPU are free to reorder these two functions. If reordering does occur, the issue that was supposedly fixed by 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") will be seen. This will lead to NULL pointer dereferences in mlx5e_ptpsq_mark_ts_cqes_undelivered in the NAPI polling context due to the tracking list being populated before the metadata map. Fixes: 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5c166d9d2dca..2fa076b23fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -401,6 +401,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + /* ensure skb is put on metadata_map before tracking the index */ + wmb(); mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { -- cgit v1.2.3 From 90502d433c0e7e5483745a574cb719dd5d05b10c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Thu, 8 Feb 2024 15:09:34 -0800 Subject: net/mlx5e: Switch to using _bh variant of of spinlock API in port timestamping NAPI poll context The NAPI poll context is a softirq context. Do not use normal spinlock API in this context to prevent concurrency issues. Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 078f56a3cbb2..ca05b3252a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -42,9 +42,9 @@ mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metad WARN_ON_ONCE(tracker->inuse); tracker->inuse = true; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_add_tail(&tracker->entry, &list->tracker_list_head); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } static void @@ -54,9 +54,9 @@ mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 me WARN_ON_ONCE(!tracker->inuse); tracker->inuse = false; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_del(&tracker->entry); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) @@ -155,7 +155,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; - spin_lock(&cqe_list->tracker_list_lock); + spin_lock_bh(&cqe_list->tracker_list_lock); list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { struct sk_buff *skb = mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); @@ -170,7 +170,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, pos->inuse = false; list_del(&pos->entry); } - spin_unlock(&cqe_list->tracker_list_lock); + spin_unlock_bh(&cqe_list->tracker_list_lock); } #define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) -- cgit v1.2.3 From 014bcf41d946b36a8f0b8e9b5d9529efbb822f49 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 29 Feb 2024 14:30:06 -0500 Subject: USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command The isd200 sub-driver in usb-storage uses the HEADS and SECTORS values in the ATA ID information to calculate cylinder and head values when creating a CDB for READ or WRITE commands. The calculation involves division and modulus operations, which will cause a crash if either of these values is 0. While this never happens with a genuine device, it could happen with a flawed or subversive emulation, as reported by the syzbot fuzzer. Protect against this possibility by refusing to bind to the device if either the ATA_ID_HEADS or ATA_ID_SECTORS value in the device's ID information is 0. This requires isd200_Initialization() to return a negative error code when initialization fails; currently it always returns 0 (even when there is an error). Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+28748250ab47a8f04100@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-usb/0000000000003eb868061245ba7f@google.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: PrasannaKumar Muralidharan Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/b1e605ea-333f-4ac0-9511-da04f411763e@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/isd200.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 4e0eef1440b7..300aeef160e7 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } -- cgit v1.2.3 From 672448ccf9b6a676f96f9352cbf91f4d35f4084a Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Wed, 21 Feb 2024 12:53:04 +0100 Subject: tty: serial: imx: Fix broken RS485 When about to transmit the function imx_uart_start_tx is called and in some RS485 configurations this function will call imx_uart_stop_rx. The problem is that imx_uart_stop_rx will enable loopback in order to release the RS485 bus, but when loopback is enabled transmitted data will just be looped to RX. This patch fixes the above problem by not enabling loopback when about to transmit. This driver now works well when used for RS485 half duplex master configurations. Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high") Cc: stable Signed-off-by: Rickard x Andersson Tested-by: Christoph Niedermaier Link: https://lore.kernel.org/r/20240221115304.509811-1-rickaran@axis.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4aa72d5aeafb..e14813250616 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -462,8 +462,7 @@ static void imx_uart_stop_tx(struct uart_port *port) } } -/* called with port.lock taken and irqs off */ -static void imx_uart_stop_rx(struct uart_port *port) +static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback) { struct imx_port *sport = (struct imx_port *)port; u32 ucr1, ucr2, ucr4, uts; @@ -485,7 +484,7 @@ static void imx_uart_stop_rx(struct uart_port *port) /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ if (port->rs485.flags & SER_RS485_ENABLED && port->rs485.flags & SER_RS485_RTS_ON_SEND && - sport->have_rtscts && !sport->have_rtsgpio) { + sport->have_rtscts && !sport->have_rtsgpio && loopback) { uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); uts |= UTS_LOOP; imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); @@ -497,6 +496,16 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } +/* called with port.lock taken and irqs off */ +static void imx_uart_stop_rx(struct uart_port *port) +{ + /* + * Stop RX and enable loopback in order to make sure RS485 bus + * is not blocked. Se comment in imx_uart_probe(). + */ + imx_uart_stop_rx_with_loopback_ctrl(port, true); +} + /* called with port.lock taken and irqs off */ static void imx_uart_enable_ms(struct uart_port *port) { @@ -682,9 +691,14 @@ static void imx_uart_start_tx(struct uart_port *port) imx_uart_rts_inactive(sport, &ucr2); imx_uart_writel(sport, ucr2, UCR2); + /* + * Since we are about to transmit we can not stop RX + * with loopback enabled because that will make our + * transmitted data being just looped to RX. + */ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX) && !port->rs485_rx_during_tx_gpio) - imx_uart_stop_rx(port); + imx_uart_stop_rx_with_loopback_ctrl(port, false); sport->tx_state = WAIT_AFTER_RTS; -- cgit v1.2.3 From e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 22 Feb 2024 11:26:34 -0800 Subject: serial: 8250_dw: Do not reclock if already at correct rate When userspace opens the console, we call set_termios() passing a termios with the console's configured baud rate. Currently this causes dw8250_set_termios() to disable and then re-enable the UART clock at the same frequency as it was originally. This can cause corruption of any concurrent console output. Fix it by skipping the reclocking if we are already at the correct rate. Signed-off-by: Peter Collingbourne Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms") Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240222192635.1050502-1-pcc@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 2d1f350a4bea..c1d43f040c43 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, long rate; int ret; - clk_disable_unprepare(d->clk); rate = clk_round_rate(d->clk, newrate); - if (rate > 0) { + if (rate > 0 && p->uartclk != rate) { + clk_disable_unprepare(d->clk); /* * Note that any clock-notifer worker will block in * serial8250_update_uartclk() until we are done. @@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, ret = clk_set_rate(d->clk, newrate); if (!ret) p->uartclk = rate; + clk_prepare_enable(d->clk); } - clk_prepare_enable(d->clk); dw8250_do_set_termios(p, termios, old); } -- cgit v1.2.3 From 43066e32227ecde674e8ae1fcdd4a1ede67680c2 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 26 Feb 2024 23:23:51 +0800 Subject: serial: port: Don't suspend if the port is still busy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We accidently met the issue that the bash prompt is not shown after the previous command done and until the next input if there's only one CPU (In our issue other CPUs are isolated by isolcpus=). Further analysis shows it's because the port entering runtime suspend even if there's still pending chars in the buffer and the pending chars will only be processed in next device resuming. We are using amba-pl011 and the problematic flow is like below: Bash                                         kworker tty_write()   file_tty_write()     n_tty_write()       uart_write()         __uart_start()           pm_runtime_get() // wakeup waker             queue_work()                                     pm_runtime_work()                                                rpm_resume()                                                 status = RPM_RESUMING                                                 serial_port_runtime_resume()                                                   port->ops->start_tx()                                                     pl011_tx_chars()                                                       uart_write_wakeup()         […]         __uart_start()           pm_runtime_get() < 0 // because runtime status = RPM_RESUMING                                // later data are not commit to the port driver                                                 status = RPM_ACTIVE                                                 rpm_idle() -> rpm_suspend() This patch tries to fix this by checking the port busy before entering runtime suspending. A runtime_suspend callback is added for the port driver. When entering runtime suspend the callback is invoked, if there's still pending chars in the buffer then flush the buffer. Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Cc: stable Reviewed-by: Tony Lindgren Reviewed-by: Andy Shevchenko Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240226152351.40924-1-yangyicong@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_port.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index 88975a4df306..72b6f4f326e2 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -46,8 +46,31 @@ out: return 0; } +static int serial_port_runtime_suspend(struct device *dev) +{ + struct serial_port_device *port_dev = to_serial_base_port_device(dev); + struct uart_port *port = port_dev->port; + unsigned long flags; + bool busy; + + if (port->flags & UPF_DEAD) + return 0; + + uart_port_lock_irqsave(port, &flags); + busy = __serial_port_busy(port); + if (busy) + port->ops->start_tx(port); + uart_port_unlock_irqrestore(port, flags); + + if (busy) + pm_runtime_mark_last_busy(dev); + + return busy ? -EBUSY : 0; +} + static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm, - NULL, serial_port_runtime_resume, NULL); + serial_port_runtime_suspend, + serial_port_runtime_resume, NULL); static int serial_port_probe(struct device *dev) { -- cgit v1.2.3 From 1581dafaf0d34bc9c428a794a22110d7046d186d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 29 Feb 2024 17:15:27 -0500 Subject: vt: fix unicode buffer corruption when deleting characters This is the same issue that was fixed for the VGA text buffer in commit 39cdb68c64d8 ("vt: fix memory overlapping when deleting chars in the buffer"). The cure is also the same i.e. replace memcpy() with memmove() due to the overlaping buffers. Signed-off-by: Nicolas Pitre Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition") Cc: stable Link: https://lore.kernel.org/r/sn184on2-3p0q-0qrq-0218-895349s4753o@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 156efda7c80d..38a765eadbe2 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -381,7 +381,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) u32 *ln = vc->vc_uni_lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; - memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); + memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } -- cgit v1.2.3 From 143667ee9a9cb88b6da7fe6a3d0f32bc33d75d71 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:26 +0200 Subject: nvmet: compare mqes and sqsize only for IO SQ According to the NVMe Spec: " MQES: This field indicates the maximum individual queue size that the controller supports. For NVMe over PCIe implementations, this value applies to the I/O Submission Queues and I/O Completion Queues that the host creates. For NVMe over Fabrics implementations, this value applies to only the I/O Submission Queues that the host creates. " Align the target code to compare mqes and sqsize as mentioned in the NVMe Spec. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d8da840a1c0e..4d014c5d0b6a 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -157,7 +157,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } - if (sqsize > mqes) { + /* for fabrics, this value applies to only the I/O Submission Queues */ + if (qid && sqsize > mqes) { pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n", sqsize, mqes, ctrl->cntlid); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); -- cgit v1.2.3 From 63e8fd6240f08ddf3cffec73dfb90f9594a3a0c6 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:27 +0200 Subject: nvmet: set maxcmd to be per controller This is a preparation for having a dynamic configuration of max queue size for a controller. Make sure that the maxcmd field stays the same as the MQES (+1) value as we do today. Reviewed-by: Christoph Hellwig Reviewed-by: Israel Rukshin Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/discovery.c | 2 +- drivers/nvme/target/nvmet.h | 2 +- drivers/nvme/target/passthru.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 39cb570f833d..f5b7054a4a05 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->cqes = (0x4 << 4) | 0x4; /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 68e82ccc0e4e..ce54da8c6b36 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -282,7 +282,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) id->lpa = (1 << 2); /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ if (ctrl->ops->flags & NVMF_KEYED_SGLS) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6c8acebe1a1a..144aca2fa6ad 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -545,7 +545,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, #define NVMET_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 -#define NVMET_MAX_CMD NVMET_QUEUE_SIZE +#define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1) /* * Nice round number that makes a list of nsids fit into a page. diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index f2d963e1fe94..bb4a69d538fd 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -132,7 +132,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes); id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes); - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); /* don't support fuse commands */ id->fuses = 0; -- cgit v1.2.3 From c82c370dcabefb60eeeb6d4b364e0b88951c3546 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:28 +0200 Subject: nvmet: set ctrl pi_support cap before initializing cap reg This is a preparation for setting the maximal queue size of a controller that supports PI. Reviewed-by: Christoph Hellwig Reviewed-by: Israel Rukshin Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fabrics-cmd.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8658e9c08534..5d50f731c326 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1411,6 +1411,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; nvmet_init_cap(ctrl); WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 4d014c5d0b6a..08e9c6b6f551 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -252,8 +252,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) if (status) goto out; - ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; - uuid_copy(&ctrl->hostid, &d->hostid); ret = nvmet_setup_auth(ctrl); -- cgit v1.2.3 From 36144964062b8676ee64281852de2a2c1b193aca Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:29 +0200 Subject: nvme-rdma: introduce NVME_RDMA_MAX_METADATA_QUEUE_SIZE definition This definition will be used by controllers that are configured with metadata support. For now, both regular and metadata controllers have the same maximal queue size but later commit will increase the maximal queue size for regular RDMA controllers to 256. We'll keep the maximal queue size for metadata controllers to be 128 since there are more resources that are needed for metadata operations and 128 is the optimal size found for metadata controllers base on testing. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 2 ++ include/linux/nvme-rdma.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 3a0f2c170f4c..b3f8416ec803 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -2015,6 +2015,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) { + if (ctrl->pi_support) + return NVME_RDMA_MAX_METADATA_QUEUE_SIZE; return NVME_RDMA_MAX_QUEUE_SIZE; } diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 146dd2223a5f..d0b9941911a1 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -8,7 +8,8 @@ #define NVME_RDMA_IP_PORT 4420 -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, -- cgit v1.2.3 From ad178ba9d90a58229f2d4ef57eb0d5eb37acbed9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:30 +0200 Subject: nvme-rdma: clamp queue size according to ctrl cap If a controller is configured with metadata support, clamp the maximal queue size to be 128 since there are more resources that are needed for metadata operations. Otherwise, clamp it to 256. Reviewed-by: Sagi Grimberg Reviewed-by: Israel Rukshin Reviewed-by: Christoph Hellwig Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 20fdd40b1879..366f0bb4ebfc 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) { int ret; bool changed; + u16 max_queue_size; ret = nvme_rdma_configure_admin_queue(ctrl, new); if (ret) @@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); } - if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { + if (ctrl->ctrl.max_integrity_segments) + max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE; + else + max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; + + if (ctrl->ctrl.sqsize + 1 > max_queue_size) { dev_warn(ctrl->ctrl.device, - "ctrl sqsize %u > max queue size %u, clamping down\n", - ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); - ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; + "ctrl sqsize %u > max queue size %u, clamping down\n", + ctrl->ctrl.sqsize + 1, max_queue_size); + ctrl->ctrl.sqsize = max_queue_size - 1; } if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { -- cgit v1.2.3 From ca2b221d89a81849a2f4a25d024a3d527a210ab6 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:31 +0200 Subject: nvmet: introduce new max queue size configuration entry Using this port configuration, one will be able to set the maximal queue size to be used for any controller that will be associated to the configured port. The default value stayed 1024 but each transport will be able to set the its own values before enabling the port. Introduce lower limit of 16 for minimal queue depth (same as we use in the host fabrics drivers). Reviewed-by: Christoph Hellwig Reviewed-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Guixin Liu Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 28 ++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 17 +++++++++++++++-- drivers/nvme/target/nvmet.h | 4 +++- 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 2482a0db2504..77a6e817b315 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +static ssize_t nvmet_param_max_queue_size_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size); +} + +static ssize_t nvmet_param_max_queue_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + int ret; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + ret = kstrtoint(page, 0, &port->max_queue_size); + if (ret) { + pr_err("Invalid value '%s' for max_queue_size\n", page); + return -EINVAL; + } + return count; +} + +CONFIGFS_ATTR(nvmet_, param_max_queue_size); + #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_param_pi_enable_show(struct config_item *item, char *page) @@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_trtype, &nvmet_attr_addr_tsas, &nvmet_attr_param_inline_data_size, + &nvmet_attr_param_max_queue_size, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_attr_param_pi_enable, #endif @@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ + port->max_queue_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5d50f731c326..6bbe4df0166c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -358,6 +358,18 @@ int nvmet_enable_port(struct nvmet_port *port) if (port->inline_data_size < 0) port->inline_data_size = 0; + /* + * If the transport didn't set the max_queue_size properly, then clamp + * it to the target limits. Also set default values in case the + * transport didn't set it at all. + */ + if (port->max_queue_size < 0) + port->max_queue_size = NVMET_MAX_QUEUE_SIZE; + else + port->max_queue_size = clamp_t(int, port->max_queue_size, + NVMET_MIN_QUEUE_SIZE, + NVMET_MAX_QUEUE_SIZE); + port->enabled = true; port->tr_ops = ops; return 0; @@ -1223,9 +1235,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ctrl->cap |= (15ULL << 24); /* maximum queue entries supported: */ if (ctrl->ops->get_max_queue_size) - ctrl->cap |= ctrl->ops->get_max_queue_size(ctrl) - 1; + ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), + ctrl->port->max_queue_size) - 1; else - ctrl->cap |= NVMET_QUEUE_SIZE - 1; + ctrl->cap |= ctrl->port->max_queue_size - 1; if (nvmet_is_passthru_subsys(ctrl->subsys)) nvmet_passthrough_override_cap(ctrl); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 144aca2fa6ad..7c6e7e65b032 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -163,6 +163,7 @@ struct nvmet_port { void *priv; bool enabled; int inline_data_size; + int max_queue_size; const struct nvmet_fabrics_ops *tr_ops; bool pi_enable; }; @@ -543,7 +544,8 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -#define NVMET_QUEUE_SIZE 1024 +#define NVMET_MIN_QUEUE_SIZE 16 +#define NVMET_MAX_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1) -- cgit v1.2.3 From f096ba3286f5e773c496cf81667d01f2e8a2a37b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 23 Jan 2024 16:40:32 +0200 Subject: nvmet-rdma: set max_queue_size for RDMA transport A new port configuration was added to set max_queue_size. Clamp user configuration to RDMA transport limits. Increase the maximal queue size of RDMA controllers from 128 to 256 (the default size stays 128 same as before). Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 8 ++++++++ include/linux/nvme-rdma.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b3f8416ec803..f2bb9d95ecf4 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1956,6 +1956,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport) nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; } + if (nport->max_queue_size < 0) { + nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE; + } else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) { + pr_warn("max_queue_size %u is too large, reducing to %u\n", + nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE); + nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; + } + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, nport->disc_addr.trsvcid, &port->addr); if (ret) { diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index d0b9941911a1..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -8,8 +8,9 @@ #define NVME_RDMA_IP_PORT 4420 -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_MAX_QUEUE_SIZE 256 #define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, -- cgit v1.2.3 From 4999568184e5d68903e6d0e49609979cd6ef01d7 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 28 Feb 2024 10:37:59 +0800 Subject: nvme-fabrics: check max outstanding commands Maxcmd is mandatory for fabrics, check it early to identify the root cause instead of waiting for it to propagate to "sqsize" and "allocing queue". By the way, change nvme_check_ctrl_fabric_info() to nvmf_validate_identify_ctrl(). Reviewed-by: Chaitanya Kulkarni Signed-off-by: Guixin Liu Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eed3e22e24d9..cb13f7c79eaf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3119,6 +3119,11 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct return -EINVAL; } + if (!ctrl->maxcmd) { + dev_err(ctrl->device, "Maximum outstanding commands is 0\n"); + return -EINVAL; + } + return 0; } -- cgit v1.2.3 From aa707b615ce1551c25c5a3500cca2cf620e36b12 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sun, 4 Feb 2024 13:38:02 -0300 Subject: Drivers: hv: vmbus: make hv_bus const Now that the driver core can properly handle constant struct bus_type, move the hv_bus variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240204-bus_cleanup-hv-v1-1-521bd4140673@marliere.net Signed-off-by: Wei Liu Message-ID: <20240204-bus_cleanup-hv-v1-1-521bd4140673@marliere.net> --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b33d5abd9beb..7f7965f3d187 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -988,7 +988,7 @@ static const struct dev_pm_ops vmbus_pm = { }; /* The one and only one */ -static struct bus_type hv_bus = { +static const struct bus_type hv_bus = { .name = "vmbus", .match = vmbus_match, .shutdown = vmbus_shutdown, -- cgit v1.2.3 From dd50f771af20fb02b1aecde04fbd085c872a9139 Mon Sep 17 00:00:00 2001 From: Max Nguyen Date: Sun, 3 Mar 2024 14:13:52 -0800 Subject: Input: xpad - add additional HyperX Controller Identifiers Add additional HyperX device identifiers to xpad_device and xpad_table. Suggested-by: Chris Toledanes Reviewed-by: Carl Ng Signed-off-by: Max Nguyen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/44ad5ffa-76d8-4046-94ee-2ef171930ed2@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 7c4b2a5cc1b5..14c828adebf7 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -130,7 +130,12 @@ static const struct xpad_device { { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, + { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */ + { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, + { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, + { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ + { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, @@ -463,6 +468,7 @@ static const struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ + XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ -- cgit v1.2.3 From ac3e0384073b2408d6cb0d972fee9fcc3776053d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Feb 2024 20:00:35 +0100 Subject: misc: lis3lv02d_i2c: Fix regulators getting en-/dis-abled twice on suspend/resume When not configured for wakeup lis3lv02d_i2c_suspend() will call lis3lv02d_poweroff() even if the device has already been turned off by the runtime-suspend handler and if configured for wakeup and the device is runtime-suspended at this point then it is not turned back on to serve as a wakeup source. Before commit b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback"), lis3lv02d_poweroff() failed to disable the regulators which as a side effect made calling poweroff() twice ok. Now that poweroff() correctly disables the regulators, doing this twice triggers a WARN() in the regulator core: unbalanced disables for regulator-dummy WARNING: CPU: 1 PID: 92 at drivers/regulator/core.c:2999 _regulator_disable ... Fix lis3lv02d_i2c_suspend() to not call poweroff() a second time if already runtime-suspended and add a poweron() call when necessary to make wakeup work. lis3lv02d_i2c_resume() has similar issues, with an added weirness that it always powers on the device if it is runtime suspended, after which the first runtime-resume will call poweron() again, causing the enabled count for the regulator to increase by 1 every suspend/resume. These unbalanced regulator_enable() calls cause the regulator to never be turned off and trigger the following WARN() on driver unbind: WARNING: CPU: 1 PID: 1724 at drivers/regulator/core.c:2396 _regulator_put Fix this by making lis3lv02d_i2c_resume() mirror the new suspend(). Fixes: b1b9f7a49440 ("misc: lis3lv02d_i2c: Add missing setting of the reg_ctrl callback") Reported-by: Paul Menzel Closes: https://lore.kernel.org/regressions/5fc6da74-af0a-4aac-b4d5-a000b39a63a5@molgen.mpg.de/ Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Signed-off-by: Hans de Goede Tested-by: Paul Menzel # Dell XPS 15 7590 Reviewed-by: Paul Menzel Link: https://lore.kernel.org/r/20240220190035.53402-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index c6eb27d46cb0..15119584473c 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -198,8 +198,14 @@ static int lis3lv02d_i2c_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata || !lis3->pdata->wakeup_flags) + /* Turn on for wakeup if turned off by runtime suspend */ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + /* For non wakeup turn off if not already turned off by runtime suspend */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweroff(lis3); + return 0; } @@ -208,13 +214,12 @@ static int lis3lv02d_i2c_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct lis3lv02d *lis3 = i2c_get_clientdata(client); - /* - * pm_runtime documentation says that devices should always - * be powered on at resume. Pm_runtime turns them off after system - * wide resume is complete. - */ - if (!lis3->pdata || !lis3->pdata->wakeup_flags || - pm_runtime_suspended(dev)) + /* Turn back off if turned on for wakeup and runtime suspended*/ + if (lis3->pdata && lis3->pdata->wakeup_flags) { + if (pm_runtime_suspended(dev)) + lis3lv02d_poweroff(lis3); + /* For non wakeup turn back on if not runtime suspended */ + } else if (!pm_runtime_suspended(dev)) lis3lv02d_poweron(lis3); return 0; -- cgit v1.2.3 From 95915ba4b987cf2b222b0f251280228a1ff977ac Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 1 Mar 2024 20:07:31 +0530 Subject: tee: optee: Fix kernel panic caused by incorrect error handling The error path while failing to register devices on the TEE bus has a bug leading to kernel panic as follows: [ 15.398930] Unable to handle kernel paging request at virtual address ffff07ed00626d7c [ 15.406913] Mem abort info: [ 15.409722] ESR = 0x0000000096000005 [ 15.413490] EC = 0x25: DABT (current EL), IL = 32 bits [ 15.418814] SET = 0, FnV = 0 [ 15.421878] EA = 0, S1PTW = 0 [ 15.425031] FSC = 0x05: level 1 translation fault [ 15.429922] Data abort info: [ 15.432813] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 15.438310] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 15.443372] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 15.448697] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000d9e3e000 [ 15.455413] [ffff07ed00626d7c] pgd=1800000bffdf9003, p4d=1800000bffdf9003, pud=0000000000000000 [ 15.464146] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Commit 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") lead to the introduction of this bug. So fix it appropriately. Reported-by: Mikko Rapeli Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218542 Fixes: 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b1092127694..1892e49a8e6a 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) -- cgit v1.2.3 From 1ca1ba465e55b9460e4e75dec9fff31e708fec74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: geneve: make sure to pull inner header in geneve_rx() syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 32c51c244153..c4ed36c71897 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.3 From 72e6d668773fd19f78a6e8017347b08a5cccaaeb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Mar 2024 17:22:43 +0200 Subject: drm: Fix output poll work for drm_kms_helper_poll=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If drm_kms_helper_poll=n the output poll work will only get scheduled from drm_helper_probe_single_connector_modes() to handle a delayed hotplug event. Since polling is disabled the work in this case should just call drm_kms_helper_hotplug_event() w/o detecting the state of connectors and rescheduling the work. After commit d33a54e3991d after a delayed hotplug event above the connectors did get re-detected in the poll work and the work got re-scheduled periodically (since poll_running is also false if drm_kms_helper_poll=n), in effect ignoring the drm_kms_helper_poll=n kernel param. Fix the above by calling only drm_kms_helper_hotplug_event() for a delayed hotplug event if drm_kms_helper_hotplug_event=n, as was done before d33a54e3991d. Cc: Dmitry Baryshkov Reported-by: Ville Syrjälä Fixes: d33a54e3991d ("drm/probe_helper: sort out poll_running vs poll_enabled") Reviewed-by: Dmitry Baryshkov Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240301152243.1670573-1-imre.deak@intel.com --- drivers/gpu/drm/drm_probe_helper.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 3f479483d7d8..23b4e9a3361d 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -760,9 +760,11 @@ static void output_poll_execute(struct work_struct *work) changed = dev->mode_config.delayed_event; dev->mode_config.delayed_event = false; - if (!drm_kms_helper_poll && dev->mode_config.poll_running) { - drm_kms_helper_disable_hpd(dev); - dev->mode_config.poll_running = false; + if (!drm_kms_helper_poll) { + if (dev->mode_config.poll_running) { + drm_kms_helper_disable_hpd(dev); + dev->mode_config.poll_running = false; + } goto out; } -- cgit v1.2.3 From 5803b54068435be3a3254f9ecdc1ebd5c18718a8 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 23 Feb 2024 14:11:22 +0100 Subject: regulator: rk808: fix buck range on RK806 The linear ranges aren't really matching what they should be. Indeed, the range is inclusive of the min value, so it makes sense the previous range does NOT include the max step value representing the min value of the range in question. Since 1.5V is represented by the decimal value 160, the previous range max step value should be 159 and not 160. Similarly, 3.4V is represented by the decimal value 236, so the previous range max value should be 235 and not 237. The only change in behavior this makes is that this actually modeled the ranges to map step with decimal value 237 with 3.65V instead of 3.4V (the max supported by the HW). Fixes: f991a220a447 ("regulator: rk808: add rk806 support") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Link: https://msgid.link/r/20240223-rk806-regulator-ranges-v1-1-3904ab70d250@theobroma-systems.com Signed-off-by: Mark Brown --- drivers/regulator/rk808-regulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index e374fa6e5f28..bfab5468cc54 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -1017,9 +1017,9 @@ static const struct regulator_desc rk805_reg[] = { }; static const struct linear_range rk806_buck_voltage_ranges[] = { - REGULATOR_LINEAR_RANGE(500000, 0, 160, 6250), /* 500mV ~ 1500mV */ - REGULATOR_LINEAR_RANGE(1500000, 161, 237, 25000), /* 1500mV ~ 3400mV */ - REGULATOR_LINEAR_RANGE(3400000, 238, 255, 0), + REGULATOR_LINEAR_RANGE(500000, 0, 159, 6250), /* 500mV ~ 1500mV */ + REGULATOR_LINEAR_RANGE(1500000, 160, 235, 25000), /* 1500mV ~ 3400mV */ + REGULATOR_LINEAR_RANGE(3400000, 236, 255, 0), }; static const struct linear_range rk806_ldo_voltage_ranges[] = { -- cgit v1.2.3 From 6717ff5533f332ef7294655629b8fa5fb8b132de Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 23 Feb 2024 14:11:23 +0100 Subject: regulator: rk808: fix LDO range on RK806 The linear ranges aren't really matching what they should be. Indeed, the range is inclusive of the min value, so it makes sense the previous range does NOT include the max step value representing the min value of the range in question. Since 3.4V is represented by the decimal value 232, the previous range max step value should be 231 and not 232. No expected change in behavior since 3.4V was mapped with step 232 from the first range but is now mapped with step 232 from the second range. While at it, remove the incorrect comment from the second range. Fixes: f991a220a447 ("regulator: rk808: add rk806 support") Cc: Quentin Schulz Signed-off-by: Quentin Schulz Link: https://msgid.link/r/20240223-rk806-regulator-ranges-v1-2-3904ab70d250@theobroma-systems.com Signed-off-by: Mark Brown --- drivers/regulator/rk808-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index bfab5468cc54..d89ae7f16d7a 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -1023,8 +1023,8 @@ static const struct linear_range rk806_buck_voltage_ranges[] = { }; static const struct linear_range rk806_ldo_voltage_ranges[] = { - REGULATOR_LINEAR_RANGE(500000, 0, 232, 12500), /* 500mV ~ 3400mV */ - REGULATOR_LINEAR_RANGE(3400000, 233, 255, 0), /* 500mV ~ 3400mV */ + REGULATOR_LINEAR_RANGE(500000, 0, 231, 12500), /* 500mV ~ 3400mV */ + REGULATOR_LINEAR_RANGE(3400000, 232, 255, 0), }; static const struct regulator_desc rk806_reg[] = { -- cgit v1.2.3 From 152694c82950a0930533dbe972b1f4fc11b95e98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:45 -0700 Subject: nvme: set max_hw_sectors unconditionally All transports set a max_hw_sectors value in the nvme_ctrl, so make the code using it unconditional and clean it up using a little helper. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Reviewed-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cb13f7c79eaf..6ae9aedf7bc2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1944,19 +1944,19 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, return 0; } +static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) +{ + return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1; +} + static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, struct request_queue *q) { bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT; - if (ctrl->max_hw_sectors) { - u32 max_segments = - (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1; - - max_segments = min_not_zero(max_segments, ctrl->max_segments); - blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); - blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); - } + blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); + blk_queue_max_segments(q, min_t(u32, USHRT_MAX, + min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments))); blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); -- cgit v1.2.3 From 63dfa1004322d596417f23da43cdc43cf6298c71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:46 -0700 Subject: nvme: move NVME_QUIRK_DEALLOCATE_ZEROES out of nvme_config_discard Move the handling of the NVME_QUIRK_DEALLOCATE_ZEROES quirk out of nvme_config_discard so that it is combined with the normal write_zeroes limit handling. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6ae9aedf7bc2..a6c0b2f4cf79 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1816,9 +1816,6 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, else blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); queue->limits.discard_granularity = queue_logical_block_size(queue); - - if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) - blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) @@ -2029,8 +2026,12 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(ctrl, disk, head); - blk_queue_max_write_zeroes_sectors(disk->queue, - ctrl->max_zeroes_sectors); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX); + else + blk_queue_max_write_zeroes_sectors(disk->queue, + ctrl->max_zeroes_sectors); } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) -- cgit v1.2.3 From 1b2f5d5d288080ea10b4e2ed595c0dfb11557c17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:47 -0700 Subject: nvme: remove nvme_revalidate_zones Handle setting the zone size / chunk_sectors and max_append_sectors limits together with the other ZNS limits, and just open code the call to blk_revalidate_zones in the current place. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 1 - drivers/nvme/host/zns.c | 12 ++---------- 3 files changed, 3 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a6c0b2f4cf79..2817eea07e96 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2154,7 +2154,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_unfreeze_queue(ns->disk->queue); if (blk_queue_is_zoned(ns->queue)) { - ret = nvme_revalidate_zones(ns); + ret = blk_revalidate_disk_zones(ns->disk, NULL); if (ret && !nvme_first_scan(ns->disk)) goto out; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4a484fc8a073..01e8bae78865 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1036,7 +1036,6 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) } #endif /* CONFIG_NVME_MULTIPATH */ -int nvme_revalidate_zones(struct nvme_ns *ns); int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 499bbb0eee8d..852261d78913 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -7,16 +7,6 @@ #include #include "nvme.h" -int nvme_revalidate_zones(struct nvme_ns *ns) -{ - struct request_queue *q = ns->queue; - - blk_queue_chunk_sectors(q, ns->head->zsze); - blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); - - return blk_revalidate_disk_zones(ns->disk, NULL); -} - static int nvme_set_max_append(struct nvme_ctrl *ctrl) { struct nvme_command c = { }; @@ -113,6 +103,8 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); + blk_queue_chunk_sectors(ns->queue, ns->head->zsze); + blk_queue_max_zone_append_sectors(ns->queue, ns->ctrl->max_zone_append); free_data: kfree(id); return status; -- cgit v1.2.3 From f404dd928b6667b383e684f2bd8cce507e031481 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:48 -0700 Subject: nvme: move max_integrity_segments handling out of nvme_init_integrity max_integrity_segments is just a hardware limit and doesn't need to be in nvme_init_integrity with the PI setup. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2817eea07e96..c4a268d91796 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1724,8 +1724,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, - struct nvme_ns_head *head, u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) { struct blk_integrity integrity = { }; @@ -1773,11 +1772,9 @@ static void nvme_init_integrity(struct gendisk *disk, integrity.tuple_size = head->ms; integrity.pi_offset = head->pi_offset; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, - struct nvme_ns_head *head, u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1954,6 +1951,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, USHRT_MAX, min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments))); + blk_queue_max_integrity_segments(q, ctrl->max_integrity_segments); blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); @@ -2017,8 +2015,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, if (head->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && (head->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, head, - ctrl->max_integrity_segments); + nvme_init_integrity(disk, head); else if (!nvme_ns_has_pi(head)) capacity = 0; } -- cgit v1.2.3 From f467b48e38a60c64d73619145247c550d8edf82f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:49 -0700 Subject: nvme: cleanup the nvme_init_integrity calling conventions Handle the no metadata support case in nvme_init_integrity as well to simplify the calling convention and prepare for future changes in the area. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c4a268d91796..1ecddb6f5ad9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1723,11 +1723,21 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -#ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) +static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) { struct blk_integrity integrity = { }; + if (!head->ms) + return true; + + /* + * PI can always be supported as we can ask the controller to simply + * insert/strip it, which is not possible for other kinds of metadata. + */ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) || + !(head->features & NVME_NS_METADATA_SUPPORTED)) + return nvme_ns_has_pi(head); + switch (head->pi_type) { case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { @@ -1772,12 +1782,8 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) integrity.tuple_size = head->ms; integrity.pi_offset = head->pi_offset; blk_integrity_register(disk, &integrity); + return true; } -#else -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) -{ -} -#endif /* CONFIG_BLK_DEV_INTEGRITY */ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, struct nvme_ns_head *head) @@ -2012,13 +2018,8 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (head->ms) { - if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (head->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, head); - else if (!nvme_ns_has_pi(head)) - capacity = 0; - } + if (!nvme_init_integrity(disk, head)) + capacity = 0; set_capacity_and_notify(disk, capacity); -- cgit v1.2.3 From 414c62e2ce5d93bfbdc12048530075dcea02cad8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:50 -0700 Subject: nvme: move blk_integrity_unregister into nvme_init_integrity Move uneregistering the existing integrity profile into the helper dealing with all the other integrity / metadata setup. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1ecddb6f5ad9..40fab7c47eae 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1727,6 +1727,8 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) { struct blk_integrity integrity = { }; + blk_integrity_unregister(disk); + if (!head->ms) return true; @@ -1980,8 +1982,6 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, bs = (1 << 9); } - blk_integrity_unregister(disk); - atomic_bs = phys_bs = bs; if (id->nabo == 0) { /* -- cgit v1.2.3 From 8f03cfa117e06bd2d3ba7ed8bba70a3dda310cae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:51 -0700 Subject: nvme: don't use nvme_update_disk_info for the multipath disk Currently nvme_update_ns_info_block calls nvme_update_disk_info both for the namespace attached disk, and the multipath one (if it exists). This is very different from how other stacking drivers work, and leads to a lot of complexity. Switch to setting the disk capacity and initializing the integrity profile, and let blk_stack_limits which already is called just below deal with updating the other limits. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40fab7c47eae..d356f3fa2cf8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2159,7 +2159,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); - nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); + nvme_init_integrity(ns->head->disk, ns->head); + set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, -- cgit v1.2.3 From a5b1cd61820e88d90454ad87154856a7a20aafbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:52 -0700 Subject: nvme: move a few things out of nvme_update_disk_info Move setting up the integrity profile and setting the disk capacity out of nvme_update_disk_info to get nvme_update_disk_info into a shape where it just sets queue_limits eventually. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d356f3fa2cf8..63c2b581f785 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1965,12 +1965,13 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, - struct nvme_ns_head *head, struct nvme_id_ns *id) +static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); + struct gendisk *disk = ns->disk; + struct nvme_ns_head *head = ns->head; u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; + bool valid = true; /* * The block layer can't support LBA sizes larger than the page size @@ -1978,8 +1979,8 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, * allow block I/O. */ if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { - capacity = 0; bs = (1 << 9); + valid = false; } atomic_bs = phys_bs = bs; @@ -1992,7 +1993,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { @@ -2012,24 +2013,14 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - /* - * Register a metadata profile for PI, or the plain non-integrity NVMe - * metadata masquerading as Type 0 if supported, otherwise reject block - * I/O to namespaces with metadata except when the namespace supports - * PI, as it can strip/insert in that case. - */ - if (!nvme_init_integrity(disk, head)) - capacity = 0; - - set_capacity_and_notify(disk, capacity); - - nvme_config_discard(ctrl, disk, head); + nvme_config_discard(ns->ctrl, disk, head); - if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX); else blk_queue_max_write_zeroes_sectors(disk->queue, - ctrl->max_zeroes_sectors); + ns->ctrl->max_zeroes_sectors); + return valid; } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) @@ -2103,6 +2094,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { struct nvme_id_ns *id; + sector_t capacity; unsigned lbaf; int ret; @@ -2121,6 +2113,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); + capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); + nvme_set_queue_limits(ns->ctrl, ns->queue); ret = nvme_configure_metadata(ns->ctrl, ns->head, id); @@ -2129,7 +2123,19 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); + if (!nvme_update_disk_info(ns, id)) + capacity = 0; + + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (!nvme_init_integrity(ns->disk, ns->head)) + capacity = 0; + + set_capacity_and_notify(ns->disk, capacity); if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); -- cgit v1.2.3 From d60c23e4552b04fda600c6a8681dfe57b3ee2bd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:53 -0700 Subject: nvme: move setting the write cache flags out of nvme_set_queue_limits nvme_set_queue_limits is used on the admin queue and all gendisks including hidden ones that don't support block I/O. The write cache setting on the other hand only makes sense for block I/O. Move the blk_queue_write_cache call to nvme_update_ns_info_block instead. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 63c2b581f785..ce70bfb66242 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1954,15 +1954,12 @@ static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, struct request_queue *q) { - bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT; - blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, USHRT_MAX, min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments))); blk_queue_max_integrity_segments(q, ctrl->max_integrity_segments); blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); blk_queue_dma_alignment(q, 3); - blk_queue_write_cache(q, vwc, vwc); } static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id) @@ -2093,6 +2090,7 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { + bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; struct nvme_id_ns *id; sector_t capacity; unsigned lbaf; @@ -2154,6 +2152,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); + blk_queue_write_cache(ns->disk->queue, vwc, vwc); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); -- cgit v1.2.3 From 46e7422cda8482aa3074c9caf4c224cf2fb74d71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:54 -0700 Subject: nvme: move common logic into nvme_update_ns_info nvme_update_ns_info_generic and nvme_update_ns_info_block share a fair amount of logic related to not fully supported namespace formats and updating the multipath information. Move this logic into the common caller. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 84 ++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce70bfb66242..f0686a872d0e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2070,21 +2070,8 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); blk_mq_unfreeze_queue(ns->disk->queue); - if (nvme_ns_head_multipath(ns->head)) { - blk_mq_freeze_queue(ns->head->disk->queue); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - blk_stack_limits(&ns->head->disk->queue->limits, - &ns->queue->limits, 0); - ns->head->disk->flags |= GENHD_FL_HIDDEN; - blk_mq_unfreeze_queue(ns->head->disk->queue); - } - /* Hide the block-interface for these devices */ - ns->disk->flags |= GENHD_FL_HIDDEN; - set_bit(NVME_NS_READY, &ns->flags); - - return 0; + return -ENODEV; } static int nvme_update_ns_info_block(struct nvme_ns *ns, @@ -2104,7 +2091,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, /* namespace not allocated or attached */ info->is_removed = true; ret = -ENODEV; - goto error; + goto out; } blk_mq_freeze_queue(ns->disk->queue); @@ -2162,54 +2149,67 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } - if (nvme_ns_head_multipath(ns->head)) { - blk_mq_freeze_queue(ns->head->disk->queue); - nvme_init_integrity(ns->head->disk, ns->head); - set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - blk_stack_limits(&ns->head->disk->queue->limits, - &ns->queue->limits, 0); - disk_update_readahead(ns->head->disk); - blk_mq_unfreeze_queue(ns->head->disk->queue); - } - ret = 0; out: - /* - * If probing fails due an unsupported feature, hide the block device, - * but still allow other access. - */ - if (ret == -ENODEV) { - ns->disk->flags |= GENHD_FL_HIDDEN; - set_bit(NVME_NS_READY, &ns->flags); - ret = 0; - } - -error: kfree(id); return ret; } static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) { + bool unsupported = false; + int ret; + switch (info->ids.csi) { case NVME_CSI_ZNS: if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { dev_info(ns->ctrl->device, "block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n", info->nsid); - return nvme_update_ns_info_generic(ns, info); + ret = nvme_update_ns_info_generic(ns, info); + break; } - return nvme_update_ns_info_block(ns, info); + ret = nvme_update_ns_info_block(ns, info); + break; case NVME_CSI_NVM: - return nvme_update_ns_info_block(ns, info); + ret = nvme_update_ns_info_block(ns, info); + break; default: dev_info(ns->ctrl->device, "block device for nsid %u not supported (csi %u)\n", info->nsid, info->ids.csi); - return nvme_update_ns_info_generic(ns, info); + ret = nvme_update_ns_info_generic(ns, info); + break; } + + /* + * If probing fails due an unsupported feature, hide the block device, + * but still allow other access. + */ + if (ret == -ENODEV) { + ns->disk->flags |= GENHD_FL_HIDDEN; + set_bit(NVME_NS_READY, &ns->flags); + unsupported = true; + ret = 0; + } + + if (!ret && nvme_ns_head_multipath(ns->head)) { + blk_mq_freeze_queue(ns->head->disk->queue); + if (unsupported) + ns->head->disk->flags |= GENHD_FL_HIDDEN; + else + nvme_init_integrity(ns->head->disk, ns->head); + set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); + set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); + nvme_mpath_revalidate_paths(ns); + blk_stack_limits(&ns->head->disk->queue->limits, + &ns->queue->limits, 0); + + disk_update_readahead(ns->head->disk); + blk_mq_unfreeze_queue(ns->head->disk->queue); + } + + return ret; } #ifdef CONFIG_BLK_SED_OPAL -- cgit v1.2.3 From c6fce9f12764c11ab6ff34f4cb0dec06d4d87099 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:55 -0700 Subject: nvme: split out a nvme_identify_ns_nvm helper Split the logic to query the Identify Namespace Data Structure, NVM Command Set into a separate helper. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f0686a872d0e..9abcc389f0f3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1831,12 +1831,35 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } +static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid, + struct nvme_id_ns_nvm **nvmp) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_CS_NS, + .identify.csi = NVME_CSI_NVM, + }; + struct nvme_id_ns_nvm *nvm; + int ret; + + nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!nvm) + return -ENOMEM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); + if (ret) + kfree(nvm); + else + *nvmp = nvm; + return ret; +} + static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id) { bool first = id->dps & NVME_NS_DPS_PI_FIRST; unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_command c = { }; struct nvme_id_ns_nvm *nvm; int ret = 0; u32 elbaf; @@ -1849,18 +1872,9 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, goto set_pi; } - nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); - if (!nvm) - return -ENOMEM; - - c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(head->ns_id); - c.identify.cns = NVME_ID_CNS_CS_NS; - c.identify.csi = NVME_CSI_NVM; - - ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); + ret = nvme_identify_ns_nvm(ctrl, head->ns_id, &nvm); if (ret) - goto free_data; + goto set_pi; elbaf = le32_to_cpu(nvm->elbaf[lbaf]); -- cgit v1.2.3 From e5ea00a510c61e9e809a1f13286ac802bbe724a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:56 -0700 Subject: nvme: don't query identify data in configure_metadata Move reading the Identify Namespace Data Structure, NVM Command Set out of configure_metadata into the caller. This allows doing the identify call outside the frozen I/O queues, and prepares for using data from the Identify data structure for other purposes. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 49 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9abcc389f0f3..a742fa10dd30 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1855,32 +1855,26 @@ static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid, return ret; } -static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, - struct nvme_id_ns *id) +static void nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, + struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) { bool first = id->dps & NVME_NS_DPS_PI_FIRST; unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_id_ns_nvm *nvm; - int ret = 0; u32 elbaf; head->pi_size = 0; head->ms = le16_to_cpu(id->lbaf[lbaf].ms); - if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { + if (!nvm || !(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { head->pi_size = sizeof(struct t10_pi_tuple); head->guard_type = NVME_NVM_NS_16B_GUARD; goto set_pi; } - ret = nvme_identify_ns_nvm(ctrl, head->ns_id, &nvm); - if (ret) - goto set_pi; - elbaf = le32_to_cpu(nvm->elbaf[lbaf]); /* no support for storage tag formats right now */ if (nvme_elbaf_sts(elbaf)) - goto free_data; + goto set_pi; head->guard_type = nvme_elbaf_guard_type(elbaf); switch (head->guard_type) { @@ -1894,8 +1888,6 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, break; } -free_data: - kfree(nvm); set_pi: if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; @@ -1906,22 +1898,17 @@ set_pi: head->pi_offset = 0; else head->pi_offset = head->ms - head->pi_size; - - return ret; } -static int nvme_configure_metadata(struct nvme_ctrl *ctrl, - struct nvme_ns_head *head, struct nvme_id_ns *id) +static void nvme_configure_metadata(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_id_ns *id, + struct nvme_id_ns_nvm *nvm) { - int ret; - - ret = nvme_init_ms(ctrl, head, id); - if (ret) - return ret; + nvme_init_ms(ctrl, head, id, nvm); head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return 0; + return; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -1930,7 +1917,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return 0; + return; head->features |= NVME_NS_EXT_LBAS; @@ -1957,7 +1944,6 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, else head->features |= NVME_NS_METADATA_SUPPORTED; } - return 0; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2092,6 +2078,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; + struct nvme_id_ns_nvm *nvm = NULL; struct nvme_id_ns *id; sector_t capacity; unsigned lbaf; @@ -2108,6 +2095,12 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) { + ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm); + if (ret < 0) + goto out; + } + blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; @@ -2115,12 +2108,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); nvme_set_queue_limits(ns->ctrl, ns->queue); - - ret = nvme_configure_metadata(ns->ctrl, ns->head, id); - if (ret < 0) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); nvme_set_chunk_sectors(ns, id); if (!nvme_update_disk_info(ns, id)) capacity = 0; @@ -2165,6 +2153,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ret = 0; out: + kfree(nvm); kfree(id); return ret; } -- cgit v1.2.3 From 27cb91a3a102073473c6aaf0f7f0ba2db27dbf76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:57 -0700 Subject: nvme: cleanup nvme_configure_metadata Fold nvme_init_ms into nvme_configure_metadata after splitting up a little helper to deal with the extended LBA formats. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a742fa10dd30..2ecdde361970 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1855,26 +1855,14 @@ static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid, return ret; } -static void nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, +static void nvme_configure_pi_elbas(struct nvme_ns_head *head, struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) { - bool first = id->dps & NVME_NS_DPS_PI_FIRST; - unsigned lbaf = nvme_lbaf_index(id->flbas); - u32 elbaf; - - head->pi_size = 0; - head->ms = le16_to_cpu(id->lbaf[lbaf].ms); - if (!nvm || !(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - head->pi_size = sizeof(struct t10_pi_tuple); - head->guard_type = NVME_NVM_NS_16B_GUARD; - goto set_pi; - } - - elbaf = le32_to_cpu(nvm->elbaf[lbaf]); + u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]); /* no support for storage tag formats right now */ if (nvme_elbaf_sts(elbaf)) - goto set_pi; + return; head->guard_type = nvme_elbaf_guard_type(elbaf); switch (head->guard_type) { @@ -1887,29 +1875,32 @@ static void nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, default: break; } - -set_pi: - if (head->pi_size && head->ms >= head->pi_size) - head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - else - head->pi_type = 0; - - if (first) - head->pi_offset = 0; - else - head->pi_offset = head->ms - head->pi_size; } static void nvme_configure_metadata(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) { - nvme_init_ms(ctrl, head, id, nvm); - head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + head->pi_type = 0; + head->pi_size = 0; + head->pi_offset = 0; + head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; + if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { + nvme_configure_pi_elbas(head, id, nvm); + } else { + head->pi_size = sizeof(struct t10_pi_tuple); + head->guard_type = NVME_NVM_NS_16B_GUARD; + } + + if (head->pi_size && head->ms >= head->pi_size) + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (!(id->dps & NVME_NS_DPS_PI_FIRST)) + head->pi_offset = head->ms - head->pi_size; + if (ctrl->ops->flags & NVME_F_FABRICS) { /* * The NVMe over Fabrics specification only supports metadata as -- cgit v1.2.3 From e6c9b130d68144381c097c90c517cc25e8f8924e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:58 -0700 Subject: nvme: use the atomic queue limits update API Changes the callchains that update queue_limits to build an on-stack queue_limits and update it atomically. Note that for now only the admin queue actually passes it to the queue allocation function. Doing the same for the gendisks used for the namespaces will require a little more work. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 133 +++++++++++++++++++++++++---------------------- drivers/nvme/host/nvme.h | 10 +--- drivers/nvme/host/zns.c | 16 +++--- 3 files changed, 80 insertions(+), 79 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2ecdde361970..6413ce24fb4b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1787,40 +1787,27 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) return true; } -static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, - struct nvme_ns_head *head) +static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim) { - struct request_queue *queue = disk->queue; - u32 max_discard_sectors; - - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) { - max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl); - } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { - max_discard_sectors = UINT_MAX; - } else { - blk_queue_max_discard_sectors(queue, 0); - return; - } + struct nvme_ctrl *ctrl = ns->ctrl; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - /* - * If discard is already enabled, don't reset queue limits. - * - * This works around the fact that the block layer can't cope well with - * updating the hardware limits when overridden through sysfs. This is - * harmless because discard limits in NVMe are purely advisory. - */ - if (queue->limits.max_discard_sectors) - return; + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX)) + lim->max_hw_discard_sectors = + nvme_lba_to_sect(ns->head, ctrl->dmrsl); + else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) + lim->max_hw_discard_sectors = UINT_MAX; + else + lim->max_hw_discard_sectors = 0; + + lim->discard_granularity = lim->logical_block_size; - blk_queue_max_discard_sectors(queue, max_discard_sectors); if (ctrl->dmrl) - blk_queue_max_discard_segments(queue, ctrl->dmrl); + lim->max_discard_segments = ctrl->dmrl; else - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - queue->limits.discard_granularity = queue_logical_block_size(queue); + lim->max_discard_segments = NVME_DSM_MAX_RANGES; } static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) @@ -1942,20 +1929,21 @@ static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1; } -static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, - struct request_queue *q) +static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl, + struct queue_limits *lim) { - blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); - blk_queue_max_segments(q, min_t(u32, USHRT_MAX, - min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments))); - blk_queue_max_integrity_segments(q, ctrl->max_integrity_segments); - blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 3); + lim->max_hw_sectors = ctrl->max_hw_sectors; + lim->max_segments = min_t(u32, USHRT_MAX, + min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments)); + lim->max_integrity_segments = ctrl->max_integrity_segments; + lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1; + lim->max_segment_size = UINT_MAX; + lim->dma_alignment = 3; } -static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id) +static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, + struct queue_limits *lim) { - struct gendisk *disk = ns->disk; struct nvme_ns_head *head = ns->head; u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; @@ -1991,23 +1979,19 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id) io_opt = bs * (1 + le16_to_cpu(id->nows)); } - blk_queue_logical_block_size(disk->queue, bs); /* * Linux filesystems assume writing a single physical block is * an atomic operation. Hence limit the physical block size to the * value of the Atomic Write Unit Power Fail parameter. */ - blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs)); - blk_queue_io_min(disk->queue, phys_bs); - blk_queue_io_opt(disk->queue, io_opt); - - nvme_config_discard(ns->ctrl, disk, head); - + lim->logical_block_size = bs; + lim->physical_block_size = min(phys_bs, atomic_bs); + lim->io_min = phys_bs; + lim->io_opt = io_opt; if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) - blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX); + lim->max_write_zeroes_sectors = UINT_MAX; else - blk_queue_max_write_zeroes_sectors(disk->queue, - ns->ctrl->max_zeroes_sectors); + lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors; return valid; } @@ -2022,7 +2006,8 @@ static inline bool nvme_first_scan(struct gendisk *disk) return !disk_live(disk); } -static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id, + struct queue_limits *lim) { struct nvme_ctrl *ctrl = ns->ctrl; u32 iob; @@ -2050,25 +2035,33 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) return; } - blk_queue_chunk_sectors(ns->queue, iob); + lim->chunk_sectors = iob; } static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct nvme_ns_info *info) { + struct queue_limits lim; + int ret; + blk_mq_freeze_queue(ns->disk->queue); - nvme_set_queue_limits(ns->ctrl, ns->queue); + lim = queue_limits_start_update(ns->disk->queue); + nvme_set_ctrl_limits(ns->ctrl, &lim); + ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); blk_mq_unfreeze_queue(ns->disk->queue); /* Hide the block-interface for these devices */ - return -ENODEV; + if (!ret) + ret = -ENODEV; + return ret; } static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; + struct queue_limits lim; struct nvme_id_ns_nvm *nvm = NULL; struct nvme_id_ns *id; sector_t capacity; @@ -2098,11 +2091,26 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); - nvme_set_queue_limits(ns->ctrl, ns->queue); + lim = queue_limits_start_update(ns->disk->queue); + nvme_set_ctrl_limits(ns->ctrl, &lim); nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); - nvme_set_chunk_sectors(ns, id); - if (!nvme_update_disk_info(ns, id)) + nvme_set_chunk_sectors(ns, id, &lim); + if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; + nvme_config_discard(ns, &lim); + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + ns->head->ids.csi == NVME_CSI_ZNS) { + ret = nvme_update_zone_info(ns, lbaf, &lim); + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } + } + ret = queue_limits_commit_update(ns->disk->queue, &lim); + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } /* * Register a metadata profile for PI, or the plain non-integrity NVMe @@ -2115,14 +2123,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, set_capacity_and_notify(ns->disk, capacity); - if (ns->head->ids.csi == NVME_CSI_ZNS) { - ret = nvme_update_zone_info(ns, lbaf); - if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } - } - /* * Only set the DEAC bit if the device guarantees that reads from * deallocated data return zeroes. While the DEAC bit does not @@ -3128,6 +3128,7 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct static int nvme_init_identify(struct nvme_ctrl *ctrl) { + struct queue_limits lim; struct nvme_id_ctrl *id; u32 max_hw_sectors; bool prev_apst_enabled; @@ -3194,7 +3195,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_hw_sectors = min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); - nvme_set_queue_limits(ctrl, ctrl->admin_q); + lim = queue_limits_start_update(ctrl->admin_q); + nvme_set_ctrl_limits(ctrl, &lim); + ret = queue_limits_commit_update(ctrl->admin_q, &lim); + if (ret) + goto out_free; + ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); ctrl->max_namespaces = le32_to_cpu(id->mnan); @@ -4357,6 +4363,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int cmd_size) { + struct queue_limits lim = {}; int ret; memset(set, 0, sizeof(*set)); @@ -4376,7 +4383,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; - ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); + ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); goto out_free_tagset; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 01e8bae78865..27397f8404d6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1038,8 +1038,9 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct queue_limits *lim); #ifdef CONFIG_BLK_DEV_ZONED -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf); blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd, enum nvme_zone_mgmt_action action); @@ -1050,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, { return BLK_STS_NOTSUPP; } - -static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) -{ - dev_warn(ns->ctrl->device, - "Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n"); - return -EPROTONOSUPPORT; -} #endif static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 852261d78913..722384bcc765 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -35,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) return 0; } -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct queue_limits *lim) { struct nvme_effects_log *log = ns->head->effects; - struct request_queue *q = ns->queue; struct nvme_command c = { }; struct nvme_id_ns_zns *id; int status; @@ -99,12 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - disk_set_zoned(ns->disk); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); - disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); - blk_queue_chunk_sectors(ns->queue, ns->head->zsze); - blk_queue_max_zone_append_sectors(ns->queue, ns->ctrl->max_zone_append); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); + lim->zoned = 1; + lim->max_open_zones = le32_to_cpu(id->mor) + 1; + lim->max_active_zones = le32_to_cpu(id->mar) + 1; + lim->chunk_sectors = ns->head->zsze; + lim->max_zone_append_sectors = ns->ctrl->max_zone_append; free_data: kfree(id); return status; -- cgit v1.2.3 From c5be5df7217fec219e1be063859e5d099b6a9227 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:04:59 -0700 Subject: nvme-multipath: pass queue_limits to blk_alloc_disk The multipath disk starts out with the stacking default limits. The one interesting part here is that blk_set_stacking_limits sets the max_zone_append_sectorts to UINT_MAX, which fails the validation for non-zoned devices. With the old one call per limit scheme this was fine because no one verified this weird mismatch and it was fixed by blk_stack_limits a little later before I/O could be issued. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index dc5d0d0a82d0..5397fb428b24 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -516,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) { + struct queue_limits lim; bool vwc = false; mutex_init(&head->lock); @@ -532,7 +533,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; - head->disk = blk_alloc_disk(NULL, ctrl->numa_node); + blk_set_stacking_limits(&lim); + lim.dma_alignment = 3; + if (head->ids.csi != NVME_CSI_ZNS) + lim.max_zone_append_sectors = 0; + + head->disk = blk_alloc_disk(&lim, ctrl->numa_node); if (IS_ERR(head->disk)) return PTR_ERR(head->disk); head->disk->fops = &nvme_ns_head_ops; @@ -553,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); - /* set to a default value of 512 until the disk is validated */ - blk_queue_logical_block_size(head->disk->queue, 512); - blk_set_stacking_limits(&head->disk->queue->limits); - blk_queue_dma_alignment(head->disk->queue, 3); - /* we need to propagate up the VMC settings */ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; -- cgit v1.2.3 From f7e0a545f7311691fbcabbb85238c8e4dd1a7c01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Mar 2024 07:05:00 -0700 Subject: nvme-multipath: use atomic queue limits API for stacking limits Switch to the queue_limits_* helpers to stack the bdev limits, which also includes updating the readahead settings. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6413ce24fb4b..3f985b93a19f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2188,6 +2188,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) } if (!ret && nvme_ns_head_multipath(ns->head)) { + struct queue_limits lim; + blk_mq_freeze_queue(ns->head->disk->queue); if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; @@ -2196,10 +2198,11 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); - blk_stack_limits(&ns->head->disk->queue->limits, - &ns->queue->limits, 0); - disk_update_readahead(ns->head->disk); + lim = queue_limits_start_update(ns->head->disk->queue); + queue_limits_stack_bdev(&lim, ns->disk->part0, 0, + ns->head->disk->disk_name); + ret = queue_limits_commit_update(ns->head->disk->queue, &lim); blk_mq_unfreeze_queue(ns->head->disk->queue); } -- cgit v1.2.3 From 5f5ea0e4916874098ee818f40156fc35dba2bcf9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 21 Feb 2024 14:45:30 +0100 Subject: nvme-fabrics: typo in nvmf_parse_key() Of course we should use the key if there is no error ... Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 3499acbf6a82..ab5ac219b70a 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -637,7 +637,7 @@ static struct key *nvmf_parse_key(int key_id) } key = key_lookup(key_id); - if (!IS_ERR(key)) + if (IS_ERR(key)) pr_err("key id %08x not found\n", key_id); else pr_debug("Using key id %08x\n", key_id); -- cgit v1.2.3 From 9dfc46c87cdc8f5a42a71de247a744a6b8188980 Mon Sep 17 00:00:00 2001 From: Cong Yang Date: Fri, 1 Mar 2024 14:11:28 +0800 Subject: drm/panel: boe-tv101wum-nl6: Fine tune Himax83102-j02 panel HFP and HBP (again) The current measured frame rate is 59.95Hz, which does not meet the requirements of touch-stylus and stylus cannot work normally. After adjustment, the actual measurement is 60.001Hz. Now this panel looks like it's only used by me on the MTK platform, so let's change this set of parameters. [ dianders: Added "(again") to subject and fixed the "Fixes" line ] Fixes: cea7008190ad ("drm/panel: boe-tv101wum-nl6: Fine tune Himax83102-j02 panel HFP and HBP") Signed-off-by: Cong Yang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240301061128.3145982-1-yangcong5@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index c4c0f08e9202..4945a1e787eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1768,11 +1768,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = { }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { - .clock = 162850, + .clock = 162680, .hdisplay = 1200, - .hsync_start = 1200 + 50, - .hsync_end = 1200 + 50 + 20, - .htotal = 1200 + 50 + 20 + 50, + .hsync_start = 1200 + 60, + .hsync_end = 1200 + 60 + 20, + .htotal = 1200 + 60 + 20 + 40, .vdisplay = 1920, .vsync_start = 1920 + 116, .vsync_end = 1920 + 116 + 8, -- cgit v1.2.3 From af1e0a7d39f98c0dea1b186a76fcee7da6a5f7bc Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 4 Mar 2024 18:16:53 +0800 Subject: firmware: microchip: Fix over-requested allocation size cocci warnings: (new ones prefixed by >>) >> drivers/firmware/microchip/mpfs-auto-update.c:387:72-78: ERROR: application of sizeof to pointer drivers/firmware/microchip/mpfs-auto-update.c:170:72-78: ERROR: application of sizeof to pointer response_msg is a pointer to u32, so the size of element it points to is supposed to be a multiple of sizeof(u32), rather than sizeof(u32 *). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403040516.CYxoWTXw-lkp@intel.com/ Signed-off-by: Dawei Li Fixes: ec5b0f1193ad ("firmware: microchip: add PolarFire SoC Auto Update support") Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 81f5f62e34fc..32394c24b37d 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -384,7 +384,8 @@ static int mpfs_auto_update_available(struct mpfs_auto_update_priv *priv) u32 *response_msg; int ret; - response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg), + response_msg = devm_kzalloc(priv->dev, + AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg), GFP_KERNEL); if (!response_msg) return -ENOMEM; -- cgit v1.2.3 From 89d72d4125e94aa3c2140fedd97ce07ba9e37674 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Mar 2024 09:06:08 +0100 Subject: net: sparx5: Fix use after free inside sparx5_del_mact_entry Based on the static analyzis of the code it looks like when an entry from the MAC table was removed, the entry was still used after being freed. More precise the vid of the mac_entry was used after calling devm_kfree on the mac_entry. The fix consists in first using the vid of the mac_entry to delete the entry from the HW and after that to free it. Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240301080608.3053468-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2..75868b3f548e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); -- cgit v1.2.3 From aec7d25b497ce4a8d044e9496de0aa433f7f8f06 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 4 Mar 2024 14:43:55 +0100 Subject: platform/x86: p2sb: On Goldmont only cache P2SB and SPI devfn BAR On Goldmont p2sb_bar() only ever gets called for 2 devices, the actual P2SB devfn 13,0 and the SPI controller which is part of the P2SB, devfn 13,2. But the current p2sb code tries to cache BAR0 info for all of devfn 13,0 to 13,7 . This involves calling pci_scan_single_device() for device 13 functions 0-7 and the hw does not seem to like pci_scan_single_device() getting called for some of the other hidden devices. E.g. on an ASUS VivoBook D540NV-GQ065T this leads to continuous ACPI errors leading to high CPU usage. Fix this by only caching BAR0 info and thus only calling pci_scan_single_device() for the P2SB and the SPI controller. Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") Reported-by: Danil Rybakov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218531 Tested-by: Danil Rybakov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240304134356.305375-2-hdegoede@redhat.com --- drivers/platform/x86/p2sb.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 6bd14d0132db..3d66e1d4eb1f 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,9 +20,11 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; @@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { - unsigned int slot, fn; - - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; -- cgit v1.2.3 From 0314cebb29be2f961abb37bd0b01cb16899868f2 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 26 Feb 2024 06:40:10 -0800 Subject: platform/x86/amd/pmf: Fix missing error code in amd_pmf_init_smart_pc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the error path, assign -ENOMEM to ret when memory allocation of "dev->prev_data" fails. Fixes: e70961505808 ("platform/x86/amd/pmf: Fixup error handling for amd_pmf_init_smart_pc()") Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240226144011.2100804-1-harshit.m.mogalapalli@oracle.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8527dca9cf56..dcbe8f85e122 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -458,8 +458,10 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) amd_pmf_hex_dump_pb(dev); dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) + if (!dev->prev_data) { + ret = -ENOMEM; goto error; + } ret = amd_pmf_start_policy_engine(dev); if (ret) -- cgit v1.2.3 From 0b385be4c3ccd5636441923d7cad5eda6b4651cb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 23 Feb 2024 22:32:15 +0200 Subject: drm/i915: Don't explode when the dig port we don't have an AUX CH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The icl+ power well code currently assumes that every AUX power well maps to an encoder which is using said power well. That is by no menas guaranteed as we: - only register encoders for ports declared in the VBT - combo PHY HDMI-only encoder no longer get an AUX CH since commit 9856308c94ca ("drm/i915: Only populate aux_ch if really needed") However we have places such as intel_power_domains_sanitize_state() that blindly traverse all the possible power wells. So these bits of code may very well encounbter an aux power well with no associated encoder. In this particular case the BIOS seems to have left one AUX power well enabled even though we're dealing with a HDMI only encoder on a combo PHY. We then proceed to turn off said power well and explode when we can't find a matching encoder. As a short term fix we should be able to just skip the PHY related parts of the power well programming since we know this situation can only happen with combo PHYs. Another option might be to go back to always picking an AUX CH for all encoders. However I'm a bit wary about that since we might in theory end up conflicting with the VBT AUX CH assignment. Also that wouldn't help with encoders not declared in the VBT, should we ever need to poke the corresponding power wells. Longer term we need to figure out what the actual relationship is between the PHY vs. AUX CH vs. AUX power well. Currently this is entirely unclear. Cc: stable@vger.kernel.org Fixes: 9856308c94ca ("drm/i915: Only populate aux_ch if really needed") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10184 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240223203216.15210-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 6a8c66bf0e565c34ad0a18f820e0bb17951f7f91) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display_power_well.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 47cd6bb04366..06900ff307b2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -246,7 +246,14 @@ static enum phy icl_aux_pw_to_phy(struct drm_i915_private *i915, enum aux_ch aux_ch = icl_aux_pw_to_ch(power_well); struct intel_digital_port *dig_port = aux_ch_to_digital_port(i915, aux_ch); - return intel_port_to_phy(i915, dig_port->base.port); + /* + * FIXME should we care about the (VBT defined) dig_port->aux_ch + * relationship or should this be purely defined by the hardware layout? + * Currently if the port doesn't appear in the VBT, or if it's declared + * as HDMI-only and routed to a combo PHY, the encoder either won't be + * present at all or it will not have an aux_ch assigned. + */ + return dig_port ? intel_port_to_phy(i915, dig_port->base.port) : PHY_NONE; } static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, @@ -414,7 +421,8 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, intel_de_rmw(dev_priv, regs->driver, 0, HSW_PWR_WELL_CTL_REQ(pw_idx)); - if (DISPLAY_VER(dev_priv) < 12) + /* FIXME this is a mess */ + if (phy != PHY_NONE) intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), 0, ICL_LANE_ENABLE_AUX); @@ -437,7 +445,10 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); - intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), ICL_LANE_ENABLE_AUX, 0); + /* FIXME this is a mess */ + if (phy != PHY_NONE) + intel_de_rmw(dev_priv, ICL_PORT_CL_DW12(phy), + ICL_LANE_ENABLE_AUX, 0); intel_de_rmw(dev_priv, regs->driver, HSW_PWR_WELL_CTL_REQ(pw_idx), 0); -- cgit v1.2.3 From 4a30dcac38c2b34f5b4f358630774bc2c2c104b0 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 23 Feb 2024 10:40:40 +0100 Subject: usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm devices On SM8550 and SM8650 Qualcomm platforms a call to UCSI_GET_PDOS for non-PD partners will cause a firmware crash with no easy way to recover from it. Add UCSI_NO_PARTNER_PDOS quirk for those platform until we find a way to properly handle the crash. Signed-off-by: Neil Armstrong Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240223-topic-sm8550-upstream-ucsi-no-pdos-v1-1-8900ad510944@linaro.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_glink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 53a7ede8556d..faccc942b381 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -301,6 +301,7 @@ static const struct of_device_id pmic_glink_ucsi_of_quirks[] = { { .compatible = "qcom,sc8180x-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, }, { .compatible = "qcom,sc8280xp-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, }, { .compatible = "qcom,sm8350-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, }, + { .compatible = "qcom,sm8550-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, }, {} }; -- cgit v1.2.3 From 197331b27ac890d0209232d5f669830cd00e8918 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 28 Feb 2024 00:05:12 +0000 Subject: usb: typec: tpcm: Fix PORT_RESET behavior for self powered devices While commit 69f89168b310 ("usb: typec: tpcm: Fix issues with power being removed during reset") fixes the boot issues for bus powered devices such as LibreTech Renegade Elite/Firefly, it trades off the CC pins NOT being Hi-Zed during errory recovery (i.e PORT_RESET) for devices which are NOT bus powered(a.k.a self powered). This change Hi-Zs the CC pins only for self powered devices, thus preventing brown out for bus powered devices Adhering to spec is gaining more importance due to the Common charger initiative enforced by the European Union. Quoting from the spec: 4.5.2.2.2.1 ErrorRecovery State Requirements The port shall not drive VBUS or VCONN, and shall present a high-impedance to ground (above zOPEN) on its CC1 and CC2 pins. Hi-Zing the CC pins is the inteded behavior for PORT_RESET. CC pins are set to default state after tErrorRecovery in PORT_RESET_WAIT_OFF. 4.5.2.2.2.2 Exiting From ErrorRecovery State A Sink shall transition to Unattached.SNK after tErrorRecovery. A Source shall transition to Unattached.SRC after tErrorRecovery. Fixes: 69f89168b310 ("usb: typec: tpcm: Fix issues with power being removed during reset") Cc: stable@vger.kernel.org Cc: Mark Brown Signed-off-by: Badhri Jagan Sridharan Tested-by: Mark Brown Link: https://lore.kernel.org/r/20240228000512.746252-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 66e532edcece..096597231027 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4873,8 +4873,11 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? - TYPEC_CC_RD : tcpm_rp_cc(port)); + if (port->self_powered) + tcpm_set_cc(port, TYPEC_CC_OPEN); + else + tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? + TYPEC_CC_RD : tcpm_rp_cc(port)); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; -- cgit v1.2.3 From 165376f6b23e9a779850e750fb2eb06622e5a531 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Thu, 29 Feb 2024 00:11:02 +0000 Subject: usb: typec: altmodes/displayport: create sysfs nodes as driver's default device attribute group The DisplayPort driver's sysfs nodes may be present to the userspace before typec_altmode_set_drvdata() completes in dp_altmode_probe. This means that a sysfs read can trigger a NULL pointer error by deferencing dp->hpd in hpd_show or dp->lock in pin_assignment_show, as dev_get_drvdata() returns NULL in those cases. Remove manual sysfs node creation in favor of adding attribute group as default for devices bound to the driver. The ATTRIBUTE_GROUPS() macro is not used here otherwise the path to the sysfs nodes is no longer compliant with the ABI. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Link: https://lore.kernel.org/r/20240229001101.3889432-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index f81bec0c7b86..f8ea3054be54 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -559,16 +559,21 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(hpd); -static struct attribute *dp_altmode_attrs[] = { +static struct attribute *displayport_attrs[] = { &dev_attr_configuration.attr, &dev_attr_pin_assignment.attr, &dev_attr_hpd.attr, NULL }; -static const struct attribute_group dp_altmode_group = { +static const struct attribute_group displayport_group = { .name = "displayport", - .attrs = dp_altmode_attrs, + .attrs = displayport_attrs, +}; + +static const struct attribute_group *displayport_groups[] = { + &displayport_group, + NULL, }; int dp_altmode_probe(struct typec_altmode *alt) @@ -576,7 +581,6 @@ int dp_altmode_probe(struct typec_altmode *alt) const struct typec_altmode *port = typec_altmode_get_partner(alt); struct fwnode_handle *fwnode; struct dp_altmode *dp; - int ret; /* FIXME: Port can only be DFP_U. */ @@ -587,10 +591,6 @@ int dp_altmode_probe(struct typec_altmode *alt) DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo))) return -ENODEV; - ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group); - if (ret) - return ret; - dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL); if (!dp) return -ENOMEM; @@ -624,7 +624,6 @@ void dp_altmode_remove(struct typec_altmode *alt) { struct dp_altmode *dp = typec_altmode_get_drvdata(alt); - sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group); cancel_work_sync(&dp->work); if (dp->connector_fwnode) { @@ -649,6 +648,7 @@ static struct typec_altmode_driver dp_altmode_driver = { .driver = { .name = "typec_displayport", .owner = THIS_MODULE, + .dev_groups = displayport_groups, }, }; module_typec_altmode_driver(dp_altmode_driver); -- cgit v1.2.3 From f90ce1e04cbcc76639d6cba0fdbd820cd80b3c70 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 28 Feb 2024 17:24:41 +0530 Subject: usb: gadget: ncm: Fix handling of zero block length packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While connecting to a Linux host with CDC_NCM_NTB_DEF_SIZE_TX set to 65536, it has been observed that we receive short packets, which come at interval of 5-10 seconds sometimes and have block length zero but still contain 1-2 valid datagrams present. According to the NCM spec: "If wBlockLength = 0x0000, the block is terminated by a short packet. In this case, the USB transfer must still be shorter than dwNtbInMaxSize or dwNtbOutMaxSize. If exactly dwNtbInMaxSize or dwNtbOutMaxSize bytes are sent, and the size is a multiple of wMaxPacketSize for the given pipe, then no ZLP shall be sent. wBlockLength= 0x0000 must be used with extreme care, because of the possibility that the host and device may get out of sync, and because of test issues. wBlockLength = 0x0000 allows the sender to reduce latency by starting to send a very large NTB, and then shortening it when the sender discovers that there’s not sufficient data to justify sending a large NTB" However, there is a potential issue with the current implementation, as it checks for the occurrence of multiple NTBs in a single giveback by verifying if the leftover bytes to be processed is zero or not. If the block length reads zero, we would process the same NTB infintely because the leftover bytes is never zero and it leads to a crash. Fix this by bailing out if block length reads zero. Cc: stable@vger.kernel.org Fixes: 427694cfaafa ("usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240228115441.2105585-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index e2a059cfda2c..28f4e6552e84 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1346,7 +1346,7 @@ parse_ntb: if (to_process == 1 && (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { to_process--; - } else if (to_process > 0) { + } else if ((to_process > 0) && (block_len != 0)) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- cgit v1.2.3 From 69c63350e573367f9c8594162288cffa8a26d0d1 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 23 Feb 2024 01:33:43 +0200 Subject: usb: port: Don't try to peer unused USB ports based on location Unused USB ports may have bogus location data in ACPI PLD tables. This causes port peering failures as these unused USB2 and USB3 ports location may match. Due to these failures the driver prints a "usb: port power management may be unreliable" warning, and unnecessarily blocks port power off during runtime suspend. This was debugged on a couple DELL systems where the unused ports all returned zeroes in their location data. Similar bugreports exist for other systems. Don't try to peer or match ports that have connect type set to USB_PORT_NOT_USED. Fixes: 3bfd659baec8 ("usb: find internal hub tier mismatch via acpi") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218465 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218486 Tested-by: Paul Menzel Link: https://lore.kernel.org/linux-usb/5406d361-f5b7-4309-b0e6-8c94408f7d75@molgen.mpg.de Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Mathias Nyman Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218490 Link: https://lore.kernel.org/r/20240222233343.71856-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index c628c1abc907..4d63496f98b6 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -573,7 +573,7 @@ static int match_location(struct usb_device *peer_hdev, void *p) struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); - if (!peer_hub) + if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); @@ -584,7 +584,8 @@ static int match_location(struct usb_device *peer_hdev, void *p) for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; - if (peer && peer->location == port_dev->location) { + if (peer && peer->connect_type != USB_PORT_NOT_USED && + peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } -- cgit v1.2.3 From 74cb7e0355fae9641f825afa389d3fba3b617714 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Tue, 5 Mar 2024 09:57:06 +0800 Subject: tty: serial: fsl_lpuart: avoid idle preamble pending if CTS is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the remote uart device is not connected or not enabled after booting up, the CTS line is high by default. At this time, if we enable the flow control when opening the device(for example, using “stty -F /dev/ttyLP4 crtscts” command), there will be a pending idle preamble(first writing 0 and then writing 1 to UARTCTRL_TE will queue an idle preamble) that cannot be sent out, resulting in the uart port fail to close(waiting for TX empty), so the user space stty will have to wait for a long time or forever. This is an LPUART IP bug(idle preamble has higher priority than CTS), here add a workaround patch to enable TX CTS after enabling UARTCTRL_TE, so that the idle preamble does not get stuck due to CTS is deasserted. Fixes: 380c966c093e ("tty: serial: fsl_lpuart: add 32-bit register interface support") Cc: stable Signed-off-by: Sherry Sun Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240305015706.1050769-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 5ddf110aedbe..bbcbc91482af 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2345,9 +2345,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(&sport->port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - lpuart32_write(&sport->port, modem, UARTMODIR); - lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ + lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ + lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* re-enable the CTS if needed */ + lpuart32_write(&sport->port, modem, UARTMODIR); if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE) sport->is_cs7 = true; -- cgit v1.2.3 From 3d9319c27ceb35fa3d2c8b15508967f3fc7e5b78 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 4 Mar 2024 17:49:53 -0800 Subject: Revert "tty: serial: simplify qcom_geni_serial_send_chunk_fifo()" This reverts commit 5c7e105cd156fc9adf5294a83623d7a40c15f9b9. As identified by KASAN, the simplification done by the cleanup patch was not legal. >From tracing through the code, it can be seen that we're transmitting from a 4096-byte circular buffer. We copy anywhere from 1-4 bytes from it each time. The simplification runs into trouble when we get near the end of the circular buffer. For instance, we might start out with xmit->tail = 4094 and we want to transfer 4 bytes. With the code before simplification this was no problem. We'd read buf[4094], buf[4095], buf[0], and buf[1]. With the new code we'll do a memcpy(&buf[4094], 4) which reads 2 bytes past the end of the buffer and then skips transmitting what's at buf[0] and buf[1]. KASAN isn't 100% consistent at reporting this for me, but to be extra confident in the analysis, I added traces of the tail and tx_bytes and then wrote a test program: while true; do echo -n "abcdefghijklmnopqrstuvwxyz0" > /dev/ttyMSM0 sleep .1 done I watched the traces over SSH and saw: qcom_geni_serial_send_chunk_fifo: 4093 4 qcom_geni_serial_send_chunk_fifo: 1 3 Which indicated that one byte should be missing. Sure enough the output that should have been: abcdefghijklmnopqrstuvwxyz0 In one case was actually missing a byte: abcdefghijklmnopqrstuvwyz0 Running "ls -al" on large directories also made the missing bytes obvious since columns didn't line up. While the original code may not be the most elegant, we only talking about copying up to 4 bytes here. Let's just go back to the code that worked. Fixes: 5c7e105cd156 ("tty: serial: simplify qcom_geni_serial_send_chunk_fifo()") Cc: stable Signed-off-by: Douglas Anderson Acked-by: Jiri Slaby Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240304174952.1.I920a314049b345efd1f69d708e7f74d2213d0b49@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index e63a8fbe63bd..99e08737f293 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -851,19 +851,21 @@ static void qcom_geni_serial_stop_tx(struct uart_port *uport) } static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport, - unsigned int remaining) + unsigned int chunk) { struct qcom_geni_serial_port *port = to_dev_port(uport); struct circ_buf *xmit = &uport->state->xmit; - unsigned int tx_bytes; + unsigned int tx_bytes, c, remaining = chunk; u8 buf[BYTES_PER_FIFO_WORD]; while (remaining) { memset(buf, 0, sizeof(buf)); tx_bytes = min(remaining, BYTES_PER_FIFO_WORD); - memcpy(buf, &xmit->buf[xmit->tail], tx_bytes); - uart_xmit_advance(uport, tx_bytes); + for (c = 0; c < tx_bytes ; c++) { + buf[c] = xmit->buf[xmit->tail]; + uart_xmit_advance(uport, 1); + } iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1); -- cgit v1.2.3 From b234c70fefa7532d34ebee104de64cc16f1b21e4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 5 Mar 2024 15:23:12 +0200 Subject: xhci: Fix failure to detect ring expansion need. Ring expansion checker may incorrectly assume a completely full ring is empty, missing the need for expansion. This is due to a special empty ring case where the dequeue ends up ahead of the enqueue pointer. This is seen when enqueued TRBs fill up exactly a segment, with enqueue then pointing to the end link TRB. Once those TRBs are handled the dequeue pointer will follow the link TRB and end up pointing to the first entry on the next segment, past the enqueue. This same enqueue - dequeue condition can be true if a ring is full, with enqueue ending on that last link TRB before the dequeue pointer on the next segment. This can be seen when queuing several ~510 small URBs via usbfs in one go before a single one is handled (i.e. dequeue not moved from first entry in segment). Expand the ring already when enqueue reaches the link TRB before the dequeue segment, instead of expanding it when enqueue moves into the dequeue segment. Reported-by: Chris Yokum Closes: https://lore.kernel.org/all/949223224.833962.1709339266739.JavaMail.zimbra@totalphase.com Tested-by: Chris Yokum Fixes: f5af638f0609 ("xhci: Fix transfer ring expansion size calculation") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240305132312.955171-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f0d8a607ff21..4f64b814d4aa 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -326,7 +326,13 @@ static unsigned int xhci_ring_expansion_needed(struct xhci_hcd *xhci, struct xhc /* how many trbs will be queued past the enqueue segment? */ trbs_past_seg = enq_used + num_trbs - (TRBS_PER_SEGMENT - 1); - if (trbs_past_seg <= 0) + /* + * Consider expanding the ring already if num_trbs fills the current + * segment (i.e. trbs_past_seg == 0), not only when num_trbs goes into + * the next segment. Avoids confusing full ring with special empty ring + * case below + */ + if (trbs_past_seg < 0) return 0; /* Empty ring special case, enqueue stuck on link trb while dequeue advanced */ -- cgit v1.2.3 From daf8739c3322a762ce84f240f50e0c39181a41ab Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 5 Mar 2024 14:38:52 +0100 Subject: drm/nouveau: fix stale locked mutex in nouveau_gem_ioctl_pushbuf If VM_BIND is enabled on the client the legacy submission ioctl can't be used, however if a client tries to do so regardless it will return an error. In this case the clients mutex remained unlocked leading to a deadlock inside nouveau_drm_postclose or any other nouveau ioctl call. Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI") Cc: Danilo Krummrich Cc: # v6.6+ Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Reviewed-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240305133853.2214268-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 49c2bcbef129..5a887d67dc0e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, return -ENOMEM; if (unlikely(nouveau_cli_uvmm(cli))) - return -ENOSYS; + return nouveau_abi16_put(abi16, -ENOSYS); list_for_each_entry(temp, &abi16->channels, head) { if (temp->chan->chid == req->channel) { -- cgit v1.2.3 From cfa9ba1ae0bef0681833a22d326174fe633caab5 Mon Sep 17 00:00:00 2001 From: Frej Drejhammar Date: Sun, 11 Feb 2024 18:58:22 +0100 Subject: comedi: comedi_8255: Correct error in subdevice initialization The refactoring done in commit 5c57b1ccecc7 ("comedi: comedi_8255: Rework subdevice initialization functions") to the initialization of the io field of struct subdev_8255_private broke all cards using the drivers/comedi/drivers/comedi_8255.c module. Prior to 5c57b1ccecc7, __subdev_8255_init() initialized the io field in the newly allocated struct subdev_8255_private to the non-NULL callback given to the function, otherwise it used a flag parameter to select between subdev_8255_mmio and subdev_8255_io. The refactoring removed that logic and the flag, as subdev_8255_mm_init() and subdev_8255_io_init() now explicitly pass subdev_8255_mmio and subdev_8255_io respectively to __subdev_8255_init(), only __subdev_8255_init() never sets spriv->io to the supplied callback. That spriv->io is NULL leads to a later BUG: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP PTI CPU: 1 PID: 1210 Comm: systemd-udevd Not tainted 6.7.3-x86_64 #1 Hardware name: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX RIP: 0010:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0018:ffffa3f1c02d7b78 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff91f847aefd00 RCX: 000000000000009b RDX: 0000000000000003 RSI: 0000000000000001 RDI: ffff91f840f6fc00 RBP: ffff91f840f6fc00 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000005f R12: 0000000000000000 R13: 0000000000000000 R14: ffffffffc0102498 R15: ffff91f847ce6ba8 FS: 00007f72f4e8f500(0000) GS:ffff91f8d5c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000010540e000 CR4: 00000000000406f0 Call Trace: ? __die_body+0x15/0x57 ? page_fault_oops+0x2ef/0x33c ? insert_vmap_area.constprop.0+0xb6/0xd5 ? alloc_vmap_area+0x529/0x5ee ? exc_page_fault+0x15a/0x489 ? asm_exc_page_fault+0x22/0x30 __subdev_8255_init+0x79/0x8d [comedi_8255] pci_8255_auto_attach+0x11a/0x139 [8255_pci] comedi_auto_config+0xac/0x117 [comedi] ? __pfx___driver_attach+0x10/0x10 pci_device_probe+0x88/0xf9 really_probe+0x101/0x248 __driver_probe_device+0xbb/0xed driver_probe_device+0x1a/0x72 __driver_attach+0xd4/0xed bus_for_each_dev+0x76/0xb8 bus_add_driver+0xbe/0x1be driver_register+0x9a/0xd8 comedi_pci_driver_register+0x28/0x48 [comedi_pci] ? __pfx_pci_8255_driver_init+0x10/0x10 [8255_pci] do_one_initcall+0x72/0x183 do_init_module+0x5b/0x1e8 init_module_from_file+0x86/0xac __do_sys_finit_module+0x151/0x218 do_syscall_64+0x72/0xdb entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7f72f50a0cb9 Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 47 71 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffd47e512d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 0000562dd06ae070 RCX: 00007f72f50a0cb9 RDX: 0000000000000000 RSI: 00007f72f52d32df RDI: 000000000000000e RBP: 0000000000000000 R08: 00007f72f5168b20 R09: 0000000000000000 R10: 0000000000000050 R11: 0000000000000246 R12: 00007f72f52d32df R13: 0000000000020000 R14: 0000562dd06785c0 R15: 0000562dcfd0e9a8 Modules linked in: 8255_pci(+) comedi_8255 comedi_pci comedi intel_gtt e100(+) acpi_cpufreq rtc_cmos usbhid CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0018:ffffa3f1c02d7b78 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff91f847aefd00 RCX: 000000000000009b RDX: 0000000000000003 RSI: 0000000000000001 RDI: ffff91f840f6fc00 RBP: ffff91f840f6fc00 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000005f R12: 0000000000000000 R13: 0000000000000000 R14: ffffffffc0102498 R15: ffff91f847ce6ba8 FS: 00007f72f4e8f500(0000) GS:ffff91f8d5c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000010540e000 CR4: 00000000000406f0 This patch simply corrects the above mistake by initializing spriv->io to the given io callback. Fixes: 5c57b1ccecc7 ("comedi: comedi_8255: Rework subdevice initialization functions") Signed-off-by: Frej Drejhammar Cc: stable@vger.kernel.org Acked-by: Ian Abbott Reviewed-by: Ian Abbott Link: https://lore.kernel.org/r/20240211175822.1357-1-frej.drejhammar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_8255.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/comedi/drivers/comedi_8255.c b/drivers/comedi/drivers/comedi_8255.c index e4974b508328..a933ef53845a 100644 --- a/drivers/comedi/drivers/comedi_8255.c +++ b/drivers/comedi/drivers/comedi_8255.c @@ -159,6 +159,7 @@ static int __subdev_8255_init(struct comedi_device *dev, return -ENOMEM; spriv->context = context; + spriv->io = io; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; -- cgit v1.2.3 From f53641a6e849034a44bf80f50245a75d7a376025 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 14 Feb 2024 10:07:25 +0000 Subject: comedi: comedi_test: Prevent timers rescheduling during deletion The comedi_test devices have a couple of timers (ai_timer and ao_timer) that can be started to simulate hardware interrupts. Their expiry functions normally reschedule the timer. The driver code calls either del_timer_sync() or del_timer() to delete the timers from the queue, but does not currently prevent the timers from rescheduling themselves so synchronized deletion may be ineffective. Add a couple of boolean members (one for each timer: ai_timer_enable and ao_timer_enable) to the device private data structure to indicate whether the timers are allowed to reschedule themselves. Set the member to true when adding the timer to the queue, and to false when deleting the timer from the queue in the waveform_ai_cancel() and waveform_ao_cancel() functions. The del_timer_sync() function is also called from the waveform_detach() function, but the timer enable members will already be set to false when that function is called, so no change is needed there. Fixes: 403fe7f34e33 ("staging: comedi: comedi_test: fix timer race conditions") Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20240214100747.16203-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_test.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 30ea8b53ebf8..05ae9122823f 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -87,6 +87,8 @@ struct waveform_private { struct comedi_device *dev; /* parent comedi device */ u64 ao_last_scan_time; /* time of previous AO scan in usec */ unsigned int ao_scan_period; /* AO scan period in usec */ + bool ai_timer_enable:1; /* should AI timer be running? */ + bool ao_timer_enable:1; /* should AO timer be running? */ unsigned short ao_loopbacks[N_CHANS]; }; @@ -236,8 +238,12 @@ static void waveform_ai_timer(struct timer_list *t) time_increment = devpriv->ai_convert_time - now; else time_increment = 1; - mod_timer(&devpriv->ai_timer, - jiffies + usecs_to_jiffies(time_increment)); + spin_lock(&dev->spinlock); + if (devpriv->ai_timer_enable) { + mod_timer(&devpriv->ai_timer, + jiffies + usecs_to_jiffies(time_increment)); + } + spin_unlock(&dev->spinlock); } overrun: @@ -393,9 +399,12 @@ static int waveform_ai_cmd(struct comedi_device *dev, * Seem to need an extra jiffy here, otherwise timer expires slightly * early! */ + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = true; devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; add_timer(&devpriv->ai_timer); + spin_unlock_bh(&dev->spinlock); return 0; } @@ -404,6 +413,9 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; + spin_lock_bh(&dev->spinlock); + devpriv->ai_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ai_timer); @@ -495,8 +507,12 @@ static void waveform_ao_timer(struct timer_list *t) unsigned int time_inc = devpriv->ao_last_scan_time + devpriv->ao_scan_period - now; - mod_timer(&devpriv->ao_timer, - jiffies + usecs_to_jiffies(time_inc)); + spin_lock(&dev->spinlock); + if (devpriv->ao_timer_enable) { + mod_timer(&devpriv->ao_timer, + jiffies + usecs_to_jiffies(time_inc)); + } + spin_unlock(&dev->spinlock); } underrun: @@ -517,9 +533,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, async->inttrig = NULL; devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = true; devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); add_timer(&devpriv->ao_timer); + spin_unlock_bh(&dev->spinlock); return 1; } @@ -604,6 +623,9 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; + spin_lock_bh(&dev->spinlock); + devpriv->ao_timer_enable = false; + spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ del_timer(&devpriv->ao_timer); -- cgit v1.2.3 From a283d7f179ff83976af27bcc71f7474cb4d7c348 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Sat, 24 Feb 2024 11:42:47 +0000 Subject: misc: fastrpc: Pass proper arguments to scm call For CMA memory allocation, ownership is assigned to DSP to make it accessible by the PD running on the DSP. With current implementation HLOS VM is stored in the channel structure during rpmsg_probe and this VM is passed to qcom_scm call as the source VM. The qcom_scm call will overwrite the passed source VM with the next VM which would cause a problem in case the scm call is again needed. Adding a local copy of source VM whereever scm call is made to avoid this problem. Fixes: 0871561055e6 ("misc: fastrpc: Add support for audiopd") Cc: stable Signed-off-by: Ekansh Gupta Reviewed-by: Elliot Berman Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240224114247.85953-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 03319a1fa97f..dbd26c3b245b 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -263,7 +263,6 @@ struct fastrpc_channel_ctx { int domain_id; int sesscount; int vmcount; - u64 perms; struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS]; struct rpmsg_device *rpdev; struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS]; @@ -1279,9 +1278,11 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, /* Map if we have any heap VMIDs associated with this ADSP Static Process. */ if (fl->cctx->vmcount) { + u64 src_perms = BIT(QCOM_SCM_VMID_HLOS); + err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys, (u64)fl->cctx->remote_heap->size, - &fl->cctx->perms, + &src_perms, fl->cctx->vmperms, fl->cctx->vmcount); if (err) { dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", @@ -1915,8 +1916,10 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) /* Add memory to static PD pool, protection thru hypervisor */ if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) { + u64 src_perms = BIT(QCOM_SCM_VMID_HLOS); + err = qcom_scm_assign_mem(buf->phys, (u64)buf->size, - &fl->cctx->perms, fl->cctx->vmperms, fl->cctx->vmcount); + &src_perms, fl->cctx->vmperms, fl->cctx->vmcount); if (err) { dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", buf->phys, buf->size, err); @@ -2290,7 +2293,6 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) if (vmcount) { data->vmcount = vmcount; - data->perms = BIT(QCOM_SCM_VMID_HLOS); for (i = 0; i < data->vmcount; i++) { data->vmperms[i].vmid = vmids[i]; data->vmperms[i].perm = QCOM_SCM_PERM_RWX; -- cgit v1.2.3 From a0776c214d47ea4f7aaef138095beaa41cff03ef Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 20 Feb 2024 22:00:20 +0200 Subject: mei: gsc_proxy: match component when GSC is on different bus On Arrow Lake S systems, MEI is no longer strictly connected to bus 0, while graphics remain exclusively on bus 0. Adapt the component matching logic to accommodate this change: Original behavior: Required both MEI and graphics to be on the same bus 0. New behavior: Only enforces graphics to be on bus 0 (integrated), allowing MEI to reside on any bus. This ensures compatibility with Arrow Lake S and maintains functionality for the legacy systems. Fixes: 1dd924f6885b ("mei: gsc_proxy: add gsc proxy driver") Cc: stable@vger.kernel.org # v6.3+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240220200020.231192-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c index be52b113aea9..89364bdbb129 100644 --- a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c +++ b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c @@ -96,7 +96,8 @@ static const struct component_master_ops mei_component_master_ops = { * * The function checks if the device is pci device and * Intel VGA adapter, the subcomponent is SW Proxy - * and the parent of MEI PCI and the parent of VGA are the same PCH device. + * and the VGA is on the bus 0 reserved for built-in devices + * to reject discrete GFX. * * @dev: master device * @subcomponent: subcomponent to match (I915_COMPONENT_SWPROXY) @@ -123,7 +124,8 @@ static int mei_gsc_proxy_component_match(struct device *dev, int subcomponent, if (subcomponent != I915_COMPONENT_GSC_PROXY) return 0; - return component_compare_dev(dev->parent, ((struct device *)data)->parent); + /* Only built-in GFX */ + return (pdev->bus->number == 0); } static int mei_gsc_proxy_probe(struct mei_cl_device *cldev, @@ -146,7 +148,7 @@ static int mei_gsc_proxy_probe(struct mei_cl_device *cldev, } component_match_add_typed(&cldev->dev, &master_match, - mei_gsc_proxy_component_match, cldev->dev.parent); + mei_gsc_proxy_component_match, NULL); if (IS_ERR_OR_NULL(master_match)) { ret = -ENOMEM; goto err_exit; -- cgit v1.2.3 From ab21f3d9098b0870a385c1cb6b0753c15aa9d429 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Tue, 5 Mar 2024 10:15:56 -0300 Subject: nvme: core: constify struct class usage Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the structures nvme_class, nvme_subsys_class and nvme_ns_chr_class to be declared at build time placing them into read-only memory, instead of having to be dynamically allocated at boot time. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 53 +++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3f985b93a19f..c4d928585ce3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock); static DEFINE_IDA(nvme_instance_ida); static dev_t nvme_ctrl_base_chr_devt; -static struct class *nvme_class; -static struct class *nvme_subsys_class; +static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env); +static const struct class nvme_class = { + .name = "nvme", + .dev_uevent = nvme_class_uevent, +}; + +static const struct class nvme_subsys_class = { + .name = "nvme-subsystem", +}; static DEFINE_IDA(nvme_ns_chr_minor_ida); static dev_t nvme_ns_chr_devt; -static struct class *nvme_ns_chr_class; +static const struct class nvme_ns_chr_class = { + .name = "nvme-generic", +}; static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, @@ -2881,7 +2890,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); - subsys->dev.class = nvme_subsys_class; + subsys->dev.class = &nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; subsys->dev.groups = nvme_subsys_attrs_groups; dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance); @@ -3435,7 +3444,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, if (minor < 0) return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); - cdev_device->class = nvme_ns_chr_class; + cdev_device->class = &nvme_ns_chr_class; cdev_device->release = nvme_cdev_rel; device_initialize(cdev_device); cdev_init(cdev, fops); @@ -4627,7 +4636,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->device = &ctrl->ctrl_device; ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt), ctrl->instance); - ctrl->device->class = nvme_class; + ctrl->device->class = &nvme_class; ctrl->device->parent = ctrl->dev; if (ops->dev_attr_groups) ctrl->device->groups = ops->dev_attr_groups; @@ -4860,42 +4869,36 @@ static int __init nvme_core_init(void) if (result < 0) goto destroy_delete_wq; - nvme_class = class_create("nvme"); - if (IS_ERR(nvme_class)) { - result = PTR_ERR(nvme_class); + result = class_register(&nvme_class); + if (result) goto unregister_chrdev; - } - nvme_class->dev_uevent = nvme_class_uevent; - nvme_subsys_class = class_create("nvme-subsystem"); - if (IS_ERR(nvme_subsys_class)) { - result = PTR_ERR(nvme_subsys_class); + result = class_register(&nvme_subsys_class); + if (result) goto destroy_class; - } result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS, "nvme-generic"); if (result < 0) goto destroy_subsys_class; - nvme_ns_chr_class = class_create("nvme-generic"); - if (IS_ERR(nvme_ns_chr_class)) { - result = PTR_ERR(nvme_ns_chr_class); + result = class_register(&nvme_ns_chr_class); + if (result) goto unregister_generic_ns; - } + result = nvme_init_auth(); if (result) goto destroy_ns_chr; return 0; destroy_ns_chr: - class_destroy(nvme_ns_chr_class); + class_unregister(&nvme_ns_chr_class); unregister_generic_ns: unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); destroy_subsys_class: - class_destroy(nvme_subsys_class); + class_unregister(&nvme_subsys_class); destroy_class: - class_destroy(nvme_class); + class_unregister(&nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_delete_wq: @@ -4911,9 +4914,9 @@ out: static void __exit nvme_core_exit(void) { nvme_exit_auth(); - class_destroy(nvme_ns_chr_class); - class_destroy(nvme_subsys_class); - class_destroy(nvme_class); + class_unregister(&nvme_ns_chr_class); + class_unregister(&nvme_subsys_class); + class_unregister(&nvme_class); unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); -- cgit v1.2.3 From 3c2bcfd5ac41fc379d2a7082f24d7de227b3f1e1 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Tue, 5 Mar 2024 10:15:57 -0300 Subject: nvme: fabrics: make nvmf_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the nvmf_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index ab5ac219b70a..0141c0a6942f 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1318,7 +1318,10 @@ out_free_opts: return ERR_PTR(ret); } -static struct class *nvmf_class; +static const struct class nvmf_class = { + .name = "nvme-fabrics", +}; + static struct device *nvmf_device; static DEFINE_MUTEX(nvmf_dev_mutex); @@ -1438,15 +1441,14 @@ static int __init nvmf_init(void) if (!nvmf_default_host) return -ENOMEM; - nvmf_class = class_create("nvme-fabrics"); - if (IS_ERR(nvmf_class)) { + ret = class_register(&nvmf_class); + if (ret) { pr_err("couldn't register class nvme-fabrics\n"); - ret = PTR_ERR(nvmf_class); goto out_free_host; } nvmf_device = - device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); + device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); if (IS_ERR(nvmf_device)) { pr_err("couldn't create nvme-fabrics device!\n"); ret = PTR_ERR(nvmf_device); @@ -1462,9 +1464,9 @@ static int __init nvmf_init(void) return 0; out_destroy_device: - device_destroy(nvmf_class, MKDEV(0, 0)); + device_destroy(&nvmf_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(nvmf_class); + class_unregister(&nvmf_class); out_free_host: nvmf_host_put(nvmf_default_host); return ret; @@ -1473,8 +1475,8 @@ out_free_host: static void __exit nvmf_exit(void) { misc_deregister(&nvmf_misc); - device_destroy(nvmf_class, MKDEV(0, 0)); - class_destroy(nvmf_class); + device_destroy(&nvmf_class, MKDEV(0, 0)); + class_unregister(&nvmf_class); nvmf_host_put(nvmf_default_host); BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); -- cgit v1.2.3 From 800bb2b02fb81cc408adf7108d91087ad33d5aa9 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Tue, 5 Mar 2024 10:15:58 -0300 Subject: nvme: fcloop: make fcloop_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the fcloop_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 1471af250ea6..913cd2ec7a6f 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1556,7 +1556,9 @@ static const struct attribute_group *fcloop_dev_attr_groups[] = { NULL, }; -static struct class *fcloop_class; +static const struct class fcloop_class = { + .name = "fcloop", +}; static struct device *fcloop_device; @@ -1564,15 +1566,14 @@ static int __init fcloop_init(void) { int ret; - fcloop_class = class_create("fcloop"); - if (IS_ERR(fcloop_class)) { + ret = class_register(&fcloop_class); + if (ret) { pr_err("couldn't register class fcloop\n"); - ret = PTR_ERR(fcloop_class); return ret; } fcloop_device = device_create_with_groups( - fcloop_class, NULL, MKDEV(0, 0), NULL, + &fcloop_class, NULL, MKDEV(0, 0), NULL, fcloop_dev_attr_groups, "ctl"); if (IS_ERR(fcloop_device)) { pr_err("couldn't create ctl device!\n"); @@ -1585,7 +1586,7 @@ static int __init fcloop_init(void) return 0; out_destroy_class: - class_destroy(fcloop_class); + class_unregister(&fcloop_class); return ret; } @@ -1643,8 +1644,8 @@ static void __exit fcloop_exit(void) put_device(fcloop_device); - device_destroy(fcloop_class, MKDEV(0, 0)); - class_destroy(fcloop_class); + device_destroy(&fcloop_class, MKDEV(0, 0)); + class_unregister(&fcloop_class); } module_init(fcloop_init); -- cgit v1.2.3 From 95bf25bb9ed5dedb7fb39f76489f7d6843ab0475 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 27 Feb 2024 14:19:29 -0800 Subject: drm/udl: Add ARGB8888 as a format Even though the UDL driver converts to RGB565 internally (see pixel32_to_be16() in udl_transfer.c), it advertises XRGB8888 for compatibility. Let's add ARGB8888 to that list. This makes UDL devices work on ChromeOS again after commit c91acda3a380 ("drm/gem: Check for valid formats"). Prior to that commit things were "working" because we'd silently treat the ARGB8888 that ChromeOS wanted as XRGB8888. Fixes: c91acda3a380 ("drm/gem: Check for valid formats") Reviewed-by: Dmitry Baryshkov Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240227141928.1.I24ac8d51544e4624b7e9d438d95880c4283e611b@changeid --- drivers/gpu/drm/udl/udl_modeset.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 7702359c90c2..0f8d3678770e 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -253,6 +253,7 @@ static int udl_handle_damage(struct drm_framebuffer *fb, static const uint32_t udl_primary_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, }; static const uint64_t udl_primary_plane_fmtmods[] = { -- cgit v1.2.3 From 7105e92c60c9cc4112c782d69c172e96b69a43dc Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Tue, 5 Mar 2024 08:49:21 +0100 Subject: Revert "Input: bcm5974 - check endpoint type before starting traffic" This patch intended to fix an well-knonw issue in old drivers where the endpoint type is taken for granted, which is often triggered by fuzzers. That was the case for this driver [1], and although the fix seems to be correct, it uncovered another issue that leads to a regression [2], if the endpoints of the current interface are checked. The driver makes use of endpoints that belong to a different interface rather than the one it binds (it binds to the third interface, but also accesses an endpoint from a different one). The driver should claim the interfaces it requires, but that is still not the case. Given that the regression is more severe than the issue found by syzkaller, the best approach is reverting the patch that causes the regression, and trying to fix the underlying problem before checking the endpoint types again. Note that reverting this patch will probably trigger the syzkaller bug at some point. This reverts commit 2b9c3eb32a699acdd4784d6b93743271b4970899. Link: https://syzkaller.appspot.com/bug?extid=348331f63b034f89b622 [1] Link: https://lore.kernel.org/linux-input/87sf161jjc.wl-tiwai@suse.de/ [2] Fixes: 2b9c3eb32a69 ("Input: bcm5974 - check endpoint type before starting traffic") Reported-by: Jacopo Radice Closes: https://bugzilla.suse.com/show_bug.cgi?id=1220030 Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240305-revert_bcm5974_ep_check-v3-1-527198cf6499@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'drivers') diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 953992b458e9..ca150618d32f 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -19,7 +19,6 @@ * Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) */ -#include "linux/usb.h" #include #include #include @@ -194,8 +193,6 @@ enum tp_type { /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 -/* maximum number of supported endpoints (currently trackpad and button) */ -#define MAX_ENDPOINTS 2 /* trackpad finger data block size */ #define FSIZE_TYPE1 (14 * sizeof(__le16)) @@ -894,18 +891,6 @@ static int bcm5974_resume(struct usb_interface *iface) return error; } -static bool bcm5974_check_endpoints(struct usb_interface *iface, - const struct bcm5974_config *cfg) -{ - u8 ep_addr[MAX_ENDPOINTS + 1] = {0}; - - ep_addr[0] = cfg->tp_ep; - if (cfg->tp_type == TYPE1) - ep_addr[1] = cfg->bt_ep; - - return usb_check_int_endpoints(iface, ep_addr); -} - static int bcm5974_probe(struct usb_interface *iface, const struct usb_device_id *id) { @@ -918,11 +903,6 @@ static int bcm5974_probe(struct usb_interface *iface, /* find the product index */ cfg = bcm5974_get_config(udev); - if (!bcm5974_check_endpoints(iface, cfg)) { - dev_err(&iface->dev, "Unexpected non-int endpoint\n"); - return -ENODEV; - } - /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL); input_dev = input_allocate_device(); -- cgit v1.2.3 From 963465a33141d0d52338e77f80fe543d2c9dc053 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 5 Mar 2024 11:10:42 +0100 Subject: Input: gpio_keys_polled - suppress deferred probe error for gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a PC Engines APU our admins are faced with: $ dmesg | grep -c "gpio-keys-polled gpio-keys-polled: unable to claim gpio 0, err=-517" 261 Such a message always appears when e.g. a new USB device is plugged in. Suppress this message which considerably clutters the kernel log for EPROBE_DEFER (i.e. -517). Signed-off-by: Uwe Kleine-König Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240305101042.10953-2-u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys_polled.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index ba00ecfbd343..b41fd1240f43 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -315,12 +315,10 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) error = devm_gpio_request_one(dev, button->gpio, flags, button->desc ? : DRV_NAME); - if (error) { - dev_err(dev, - "unable to claim gpio %u, err=%d\n", - button->gpio, error); - return error; - } + if (error) + return dev_err_probe(dev, error, + "unable to claim gpio %u\n", + button->gpio); bdata->gpiod = gpio_to_desc(button->gpio); if (!bdata->gpiod) { -- cgit v1.2.3 From 330068589389ccae3452db15ecacc3e147ac9c1c Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 7 Feb 2024 16:42:43 -0800 Subject: idpf: disable local BH when scheduling napi for marker packets Fix softirq's not being handled during napi_schedule() call when receiving marker packets for queue disable by disabling local bottom half. The issue can be seen on ifdown: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!! Using ftrace to catch the failing scenario: ifconfig [003] d.... 22739.830624: softirq_raise: vec=3 [action=NET_RX] -0 [003] ..s.. 22739.831357: softirq_entry: vec=3 [action=NET_RX] No interrupt and CPU is idle. After the patch when disabling local BH before calling napi_schedule: ifconfig [003] d.... 22993.928336: softirq_raise: vec=3 [action=NET_RX] ifconfig [003] ..s1. 22993.928337: softirq_entry: vec=3 [action=NET_RX] Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reviewed-by: Jesse Brandeburg Reviewed-by: Przemek Kitszel Signed-off-by: Emil Tantilov Signed-off-by: Alan Brady Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d0cdd63b3d5b..390977a76de2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -2087,8 +2087,10 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); /* schedule the napi to receive all the marker packets */ + local_bh_disable(); for (i = 0; i < vport->num_q_vectors; i++) napi_schedule(&vport->q_vectors[i].napi); + local_bh_enable(); return idpf_wait_for_marker_event(vport); } -- cgit v1.2.3 From 2652b99e43403dc464f3648483ffb38e48872fe4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jan 2024 13:51:58 -0800 Subject: ice: virtchnl: stop pretending to support RSS over AQ or registers The E800 series hardware uses the same iAVF driver as older devices, including the virtchnl negotiation scheme. This negotiation scheme includes a mechanism to determine what type of RSS should be supported, including RSS over PF virtchnl messages, RSS over firmware AdminQ messages, and RSS via direct register access. The PF driver will always prefer VIRTCHNL_VF_OFFLOAD_RSS_PF if its supported by the VF driver. However, if an older VF driver is loaded, it may request only VIRTCHNL_VF_OFFLOAD_RSS_REG or VIRTCHNL_VF_OFFLOAD_RSS_AQ. The ice driver happily agrees to support these methods. Unfortunately, the underlying hardware does not support these mechanisms. The E800 series VFs don't have the appropriate registers for RSS_REG. The mailbox queue used by VFs for VF to PF communication blocks messages which do not have the VF-to-PF opcode. Stop lying to the VF that it could support RSS over AdminQ or registers, as these interfaces do not work when the hardware is operating on an E800 series device. In practice this is unlikely to be hit by any normal user. The iAVF driver has supported RSS over PF virtchnl commands since 2016, and always defaults to using RSS_PF if possible. In principle, nothing actually stops the existing VF from attempting to access the registers or send an AQ command. However a properly coded VF will check the capability flags and will report a more useful error if it detects a case where the driver does not support the RSS offloads that it does. Fixes: 1071a8358a28 ("ice: Implement virtchnl commands for AVF support") Signed-off-by: Jacob Keller Reviewed-by: Alan Brady Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 +-------- drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index c925813ec9ca..6f2328a049bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5e19d48a05b4..d796dbd2a440 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING -- cgit v1.2.3 From 06e456a05d669ca30b224b8ed962421770c1496c Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 59c7e37f175f..df6a68ab747e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8013,6 +8013,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; -- cgit v1.2.3 From 9224fc86f1776193650a33a275cac628952f80a9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 1 Mar 2024 14:37:08 +0100 Subject: ice: fix uninitialized dplls mutex usage The pf->dplls.lock mutex is initialized too late, after its first use. Move it to the top of ice_dpll_init. Note that the "err_exit" error path destroys the mutex. And the mutex is the last thing destroyed in ice_dpll_deinit. This fixes the following warning with CONFIG_DEBUG_MUTEXES: ice 0000:10:00.0: The DDP package was successfully loaded: ICE OS Default Package version 1.3.36.0 ice 0000:10:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link) ice 0000:10:00.0: PTP init successful ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 410 at kernel/locking/mutex.c:587 __mutex_lock+0x773/0xd40 Modules linked in: crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic ice(+) nvme nvme_c> CPU: 0 PID: 410 Comm: kworker/0:4 Not tainted 6.8.0-rc5+ #3 Hardware name: HPE ProLiant DL110 Gen10 Plus/ProLiant DL110 Gen10 Plus, BIOS U56 10/19/2023 Workqueue: events work_for_cpu_fn RIP: 0010:__mutex_lock+0x773/0xd40 Code: c0 0f 84 1d f9 ff ff 44 8b 35 0d 9c 69 01 45 85 f6 0f 85 0d f9 ff ff 48 c7 c6 12 a2 a9 85 48 c7 c7 12 f1 a> RSP: 0018:ff7eb1a3417a7ae0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 RDX: 0000000000000002 RSI: ffffffff85ac2bff RDI: 00000000ffffffff RBP: ff7eb1a3417a7b80 R08: 0000000000000000 R09: 00000000ffffbfff R10: ff7eb1a3417a7978 R11: ff32b80f7fd2e568 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff32b7f02c50e0d8 FS: 0000000000000000(0000) GS:ff32b80efe800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b5852cc000 CR3: 000000003c43a004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x84/0x170 ? __mutex_lock+0x773/0xd40 ? report_bug+0x1c7/0x1d0 ? prb_read_valid+0x1b/0x30 ? handle_bug+0x42/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? __mutex_lock+0x773/0xd40 ? rcu_is_watching+0x11/0x50 ? __kmalloc_node_track_caller+0x346/0x490 ? ice_dpll_lock_status_get+0x28/0x50 [ice] ? __pfx_ice_dpll_lock_status_get+0x10/0x10 [ice] ? ice_dpll_lock_status_get+0x28/0x50 [ice] ice_dpll_lock_status_get+0x28/0x50 [ice] dpll_device_get_one+0x14f/0x2e0 dpll_device_event_send+0x7d/0x150 dpll_device_register+0x124/0x180 ice_dpll_init_dpll+0x7b/0xd0 [ice] ice_dpll_init+0x224/0xa40 [ice] ? _dev_info+0x70/0x90 ice_load+0x468/0x690 [ice] ice_probe+0x75b/0xa10 [ice] ? _raw_spin_unlock_irqrestore+0x4f/0x80 ? process_one_work+0x1a3/0x500 local_pci_probe+0x47/0xa0 work_for_cpu_fn+0x17/0x30 process_one_work+0x20d/0x500 worker_thread+0x1df/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0x103/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 irq event stamp: 125197 hardirqs last enabled at (125197): [] finish_task_switch.isra.0+0x12d/0x3d0 hardirqs last disabled at (125196): [] __schedule+0xea4/0x19f0 softirqs last enabled at (105334): [] napi_get_frags_check+0x1a/0x60 softirqs last disabled at (105332): [] napi_get_frags_check+0x1a/0x60 ---[ end trace 0000000000000000 ]--- Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Signed-off-by: Michal Schmidt Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..fda9140c0da4 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -2120,6 +2120,7 @@ void ice_dpll_init(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; int err = 0; + mutex_init(&d->lock); err = ice_dpll_init_info(pf, cgu); if (err) goto err_exit; @@ -2132,7 +2133,6 @@ void ice_dpll_init(struct ice_pf *pf) err = ice_dpll_init_pins(pf, cgu); if (err) goto deinit_pps; - mutex_init(&d->lock); if (cgu) { err = ice_dpll_init_worker(pf); if (err) -- cgit v1.2.3 From 6c5b6ca7642f2992502a22dbd8b80927de174b67 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 4 Mar 2024 16:37:07 -0800 Subject: ice: fix typo in assignment Fix an obviously incorrect assignment, created with a typo or cut-n-paste error. Fixes: 5995ef88e3a8 ("ice: realloc VSI stats arrays") Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 097bf8fd6bf0..fc23dbe302b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3192,7 +3192,7 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) } } - tx_ring_stats = vsi_stat->rx_ring_stats; + tx_ring_stats = vsi_stat->tx_ring_stats; vsi_stat->tx_ring_stats = krealloc_array(vsi_stat->tx_ring_stats, req_txq, sizeof(*vsi_stat->tx_ring_stats), -- cgit v1.2.3 From 36c824ca3e4fa8d1224c2dcdeaca39d2ca86a42f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 28 Feb 2024 18:26:03 +0100 Subject: i40e: Fix firmware version comparison function Helper i40e_is_fw_ver_eq() compares incorrectly given firmware version as it returns true when the major version of running firmware is greater than the given major version that is wrong and results in failure during getting of DCB configuration where this helper is used. Fix the check and return true only if the running FW version is exactly equals to the given version. Reproducer: 1. Load i40e driver 2. Check dmesg output [root@host ~]# modprobe i40e [root@host ~]# dmesg | grep 'i40e.*DCB' [ 74.750642] i40e 0000:02:00.0: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.759770] i40e 0000:02:00.0: DCB init failed -5, disabled [ 74.966550] i40e 0000:02:00.1: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.975683] i40e 0000:02:00.1: DCB init failed -5, disabled Fixes: cf488e13221f ("i40e: Add other helpers to check version of running firmware and AQ API") Signed-off-by: Ivan Vecera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index af4269330581..ce1f11b8ad65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -567,8 +567,7 @@ static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) **/ static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min) { - return (hw->aq.fw_maj_ver > maj || - (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min)); + return (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min); } #endif /* _I40E_PROTOTYPE_H_ */ -- cgit v1.2.3 From ef27f655b438bed4c83680e4f01e1cde2739854b Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Mon, 19 Feb 2024 10:08:43 +0100 Subject: igc: avoid returning frame twice in XDP_REDIRECT When a frame can not be transmitted in XDP_REDIRECT (e.g. due to a full queue), it is necessary to free it by calling xdp_return_frame_rx_napi. However, this is the responsibility of the caller of the ndo_xdp_xmit (see for example bq_xmit_all in kernel/bpf/devmap.c) and thus calling it inside igc_xdp_xmit (which is the ndo_xdp_xmit of the igc driver) as well will lead to memory corruption. In fact, bq_xmit_all expects that it can return all frames after the last successfully transmitted one. Therefore, break for the first not transmitted frame, but do not call xdp_return_frame_rx_napi in igc_xdp_xmit. This is equally implemented in other Intel drivers such as the igb. There are two alternatives to this that were rejected: 1. Return num_frames as all the frames would have been transmitted and release them inside igc_xdp_xmit. While it might work technically, it is not what the return value is meant to represent (i.e. the number of SUCCESSFULLY transmitted packets). 2. Rework kernel/bpf/devmap.c and all drivers to support non-consecutively dropped packets. Besides being complex, it likely has a negative performance impact without a significant gain since it is anyway unlikely that the next frame can be transmitted if the previous one was dropped. The memory corruption can be reproduced with the following script which leads to a kernel panic after a few seconds. It basically generates more traffic than a i225 NIC can transmit and pushes it via XDP_REDIRECT from a virtual interface to the physical interface where frames get dropped. #!/bin/bash INTERFACE=enp4s0 INTERFACE_IDX=`cat /sys/class/net/$INTERFACE/ifindex` sudo ip link add dev veth1 type veth peer name veth2 sudo ip link set up $INTERFACE sudo ip link set up veth1 sudo ip link set up veth2 cat << EOF > redirect.bpf.c SEC("prog") int redirect(struct xdp_md *ctx) { return bpf_redirect($INTERFACE_IDX, 0); } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c redirect.bpf.c -o redirect.bpf.o sudo ip link set veth2 xdp obj redirect.bpf.o cat << EOF > pass.bpf.c SEC("prog") int pass(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c pass.bpf.c -o pass.bpf.o sudo ip link set $INTERFACE xdp obj pass.bpf.o cat << EOF > trafgen.cfg { /* Ethernet Header */ 0xe8, 0x6a, 0x64, 0x41, 0xbf, 0x46, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 1, 1, # IP Src - adapt as needed 10, 0, 1, 2, # IP Dest - adapt as needed const16(6666), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } EOF sudo trafgen -i trafgen.cfg -b3000MB -o veth1 --cpp Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Signed-off-by: Florian Kauer Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba8d3fe186ae..81c21a893ede 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6487,7 +6487,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6503,16 +6503,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6520,7 +6519,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, -- cgit v1.2.3 From ba54b1a276a6b69d80649942fe5334d19851443e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 18 Feb 2024 09:42:21 +0200 Subject: intel: legacy: Partial revert of field get conversion Refactoring of the field get conversion introduced a regression in the legacy Wake On Lan from a magic packet with i219 devices. Rx address copied not correctly from MAC to PHY with FIELD_GET macro. Fixes: b9a452545075 ("intel: legacy: field get conversion") Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a2788fd5f8bb..19e450a5bd31 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2559,7 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - FIELD_GET(E1000_RAH_AV, mac_reg)); + (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); -- cgit v1.2.3 From 3445139e3a594be77eff48bc17eff67cf983daed Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 25 Jan 2024 00:21:31 -0800 Subject: Revert "Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"" This reverts commit bed9e27baf52a09b7ba2a3714f1e24e17ced386d. The original set [1][2] was expected to undo a suboptimal fix in [2], and replace it with a better fix [1]. However, as reported by Dan Moulding [2] causes an issue with raid5 with journal device. Revert [2] for now to close the issue. We will follow up on another issue reported by Juxiao Bi, as [2] is expected to fix it. We believe this is a good trade-off, because the latter issue happens less freqently. In the meanwhile, we will NOT revert [1], as it contains the right logic. [1] commit d6e035aad6c0 ("md: bypass block throttle for superblock update") [2] commit bed9e27baf52 ("Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"") Reported-by: Dan Moulding Closes: https://lore.kernel.org/linux-raid/20240123005700.9302-1-dan@danm.net/ Fixes: bed9e27baf52 ("Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"") Cc: stable@vger.kernel.org # v5.19+ Cc: Junxiao Bi Cc: Yu Kuai Signed-off-by: Song Liu Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20240125082131.788600-1-song@kernel.org --- drivers/md/raid5.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 48129de21aec..51a591c7628b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -36,6 +36,7 @@ */ #include +#include #include #include #include @@ -6764,7 +6765,18 @@ static void raid5d(struct md_thread *thread) spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); spin_lock_irq(&conf->device_lock); + + /* + * Waiting on MD_SB_CHANGE_PENDING below may deadlock + * seeing md_check_recovery() is needed to clear + * the flag when using mdmon. + */ + continue; } + + wait_event_lock_irq(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), + conf->device_lock); } pr_debug("%d stripes handled\n", handled); -- cgit v1.2.3 From 2f03d0c2cd451c7ac2f317079d4ec518f0986b55 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:22:58 +0800 Subject: md: don't clear MD_RECOVERY_FROZEN for new dm-raid until resume After commit 9dbd1aa3a81c ("dm raid: add reshaping support to the target") raid_ctr() will set MD_RECOVERY_FROZEN before md_run() and expect to keep array frozen until resume. However, md_run() will clear the flag by setting mddev->recovery to 0. Before commit 1baae052cccd ("md: Don't ignore suspended array in md_check_recovery()"), dm-raid actually relied on suspending to prevent starting new sync_thread. Fix this problem by keeping 'MD_RECOVERY_FROZEN' for dm-raid in md_run(). Fixes: 1baae052cccd ("md: Don't ignore suspended array in md_check_recovery()") Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-2-yukuai1@huaweicloud.com --- drivers/md/md.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 48ae2b1cb57a..0c4e00e8d485 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6062,7 +6062,10 @@ int md_run(struct mddev *mddev) pr_warn("True protection against single-disk failure might be compromised.\n"); } - mddev->recovery = 0; + /* dm-raid expect sync_thread to be frozen until resume */ + if (mddev->gendisk) + mddev->recovery = 0; + /* may be over-ridden by personality */ mddev->resync_max_sectors = mddev->dev_sectors; -- cgit v1.2.3 From 7a2347e284d7ec2f0759be4db60fa7ca937284fc Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:22:59 +0800 Subject: md: export helpers to stop sync_thread Add new helpers: void md_idle_sync_thread(struct mddev *mddev); void md_frozen_sync_thread(struct mddev *mddev); void md_unfrozen_sync_thread(struct mddev *mddev); The helpers will be used in dm-raid in later patches to fix regressions and prevent calling md_reap_sync_thread() directly. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-3-yukuai1@huaweicloud.com --- drivers/md/md.c | 29 +++++++++++++++++++++++++++++ drivers/md/md.h | 3 +++ 2 files changed, 32 insertions(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0c4e00e8d485..0bf2f7a53ed8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4942,6 +4942,35 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) mddev_lock_nointr(mddev); } +void md_idle_sync_thread(struct mddev *mddev) +{ + lockdep_assert_held(&mddev->reconfig_mutex); + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev, true, true); +} +EXPORT_SYMBOL_GPL(md_idle_sync_thread); + +void md_frozen_sync_thread(struct mddev *mddev) +{ + lockdep_assert_held(&mddev->reconfig_mutex); + + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev, true, false); +} +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); + +void md_unfrozen_sync_thread(struct mddev *mddev) +{ + lockdep_assert_held(&mddev->reconfig_mutex); + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_wakeup_thread(mddev->thread); + sysfs_notify_dirent_safe(mddev->sysfs_action); +} +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); + static void idle_sync_thread(struct mddev *mddev) { mutex_lock(&mddev->sync_mutex); diff --git a/drivers/md/md.h b/drivers/md/md.h index b2076a165c10..dff2e333756d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -792,6 +792,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); extern void md_handle_request(struct mddev *mddev, struct bio *bio); extern int mddev_suspend(struct mddev *mddev, bool interruptible); extern void mddev_resume(struct mddev *mddev); +extern void md_idle_sync_thread(struct mddev *mddev); +extern void md_frozen_sync_thread(struct mddev *mddev); +extern void md_unfrozen_sync_thread(struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); -- cgit v1.2.3 From 314e9af065513ff86ec9e32eaa96b9bd275cf51d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:00 +0800 Subject: md: export helper md_is_rdwr() There are no functional changes for now, prepare to fix a deadlock for dm-raid456. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-4-yukuai1@huaweicloud.com --- drivers/md/md.c | 12 ------------ drivers/md/md.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0bf2f7a53ed8..55ecc05c17c6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -99,18 +99,6 @@ static void mddev_detach(struct mddev *mddev); static void export_rdev(struct md_rdev *rdev, struct mddev *mddev); static void md_wakeup_thread_directly(struct md_thread __rcu *thread); -enum md_ro_state { - MD_RDWR, - MD_RDONLY, - MD_AUTO_READ, - MD_MAX_STATE -}; - -static bool md_is_rdwr(struct mddev *mddev) -{ - return (mddev->ro == MD_RDWR); -} - /* * Default number of read corrections we'll attempt on an rdev * before ejecting it from the array. We divide the read error diff --git a/drivers/md/md.h b/drivers/md/md.h index dff2e333756d..a40e898fe57e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -569,6 +569,18 @@ enum recovery_flags { MD_RESYNCING_REMOTE, /* remote node is running resync thread */ }; +enum md_ro_state { + MD_RDWR, + MD_RDONLY, + MD_AUTO_READ, + MD_MAX_STATE +}; + +static inline bool md_is_rdwr(struct mddev *mddev) +{ + return (mddev->ro == MD_RDWR); +} + static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); -- cgit v1.2.3 From 503f9d43790fdd0c6e6ae2f4dd3f70b146ac4159 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:01 +0800 Subject: md: add a new helper reshape_interrupted() The helper will be used for dm-raid456 later to detect the case that reshape can't make progress. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-5-yukuai1@huaweicloud.com --- drivers/md/md.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/md/md.h b/drivers/md/md.h index a40e898fe57e..d5721bd77f63 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -581,6 +581,25 @@ static inline bool md_is_rdwr(struct mddev *mddev) return (mddev->ro == MD_RDWR); } +static inline bool reshape_interrupted(struct mddev *mddev) +{ + /* reshape never start */ + if (mddev->reshape_position == MaxSector) + return false; + + /* interrupted */ + if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return true; + + /* running reshape will be interrupted soon. */ + if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) || + test_bit(MD_RECOVERY_INTR, &mddev->recovery) || + test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + return true; + + return false; +} + static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); -- cgit v1.2.3 From 16c4770c75b1223998adbeb7286f9a15c65fba73 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:02 +0800 Subject: dm-raid: really frozen sync_thread during suspend 1) commit f52f5c71f3d4 ("md: fix stopping sync thread") remove MD_RECOVERY_FROZEN from __md_stop_writes() and doesn't realize that dm-raid relies on __md_stop_writes() to frozen sync_thread indirectly. Fix this problem by adding MD_RECOVERY_FROZEN in md_stop_writes(), and since stop_sync_thread() is only used for dm-raid in this case, also move stop_sync_thread() to md_stop_writes(). 2) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen, it only prevent new sync_thread to start, and it can't stop the running sync thread; In order to frozen sync_thread, after seting the flag, stop_sync_thread() should be used. 3) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use it as condition for md_stop_writes() in raid_postsuspend() doesn't look correct. Consider that reentrant stop_sync_thread() do nothing, always call md_stop_writes() in raid_postsuspend(). 4) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime, and if MD_RECOVERY_FROZEN is cleared while the array is suspended, new sync_thread can start unexpected. Fix this by disallow raid_message() to change sync_thread status during suspend. Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(), and with previous fixes, the test won't hang there anymore, however, the test will still fail and complain that ext4 is corrupted. And with this patch, the test won't hang due to stop_sync_thread() or fail due to ext4 is corrupted anymore. However, there is still a deadlock related to dm-raid456 that will be fixed in following patches. Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/ Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()") Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-6-yukuai1@huaweicloud.com --- drivers/md/dm-raid.c | 25 +++++++++++++++---------- drivers/md/md.c | 3 ++- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index eb009d6bb03a..e2d7a73c0f87 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3240,11 +3240,12 @@ size_check: rs->md.ro = 1; rs->md.in_sync = 1; - /* Keep array frozen until resume. */ - set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); - /* Has to be held on running the array */ mddev_suspend_and_lock_nointr(&rs->md); + + /* Keep array frozen until resume. */ + md_frozen_sync_thread(&rs->md); + r = md_run(&rs->md); rs->md.in_sync = 0; /* Assume already marked dirty */ if (r) { @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; + if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) + return -EBUSY; + if (!strcasecmp(argv[0], "frozen")) set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else @@ -3796,10 +3800,11 @@ static void raid_postsuspend(struct dm_target *ti) struct raid_set *rs = ti->private; if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { - /* Writes have to be stopped before suspending to avoid deadlocks. */ - if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery)) - md_stop_writes(&rs->md); - + /* + * sync_thread must be stopped during suspend, and writes have + * to be stopped before suspending to avoid deadlocks. + */ + md_stop_writes(&rs->md); mddev_suspend(&rs->md, false); } } @@ -4012,8 +4017,6 @@ static int raid_preresume(struct dm_target *ti) } /* Check for any resize/reshape on @rs and adjust/initiate */ - /* Be prepared for mddev_resume() in raid_resume() */ - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); mddev->resync_min = mddev->recovery_cp; @@ -4055,10 +4058,12 @@ static void raid_resume(struct dm_target *ti) if (mddev->delta_disks < 0) rs_set_capacity(rs); + WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); + WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); mddev_lock_nointr(mddev); - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); mddev->ro = 0; mddev->in_sync = 0; + md_unfrozen_sync_thread(mddev); mddev_unlock_and_resume(mddev); } } diff --git a/drivers/md/md.c b/drivers/md/md.c index 55ecc05c17c6..167db7744239 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6364,7 +6364,6 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { - stop_sync_thread(mddev, true, false); del_timer_sync(&mddev->safemode_timer); if (mddev->pers && mddev->pers->quiesce) { @@ -6389,6 +6388,8 @@ static void __md_stop_writes(struct mddev *mddev) void md_stop_writes(struct mddev *mddev) { mddev_lock_nointr(mddev); + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev, true, false); __md_stop_writes(mddev); mddev_unlock(mddev); } -- cgit v1.2.3 From cd32b27a66db8776d8b8e82ec7d7dde97a8693b0 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:03 +0800 Subject: md/dm-raid: don't call md_reap_sync_thread() directly Currently md_reap_sync_thread() is called from raid_message() directly without holding 'reconfig_mutex', this is definitely unsafe because md_reap_sync_thread() can change many fields that is protected by 'reconfig_mutex'. However, hold 'reconfig_mutex' here is still problematic because this will cause deadlock, for example, commit 130443d60b1b ("md: refactor idle/frozen_sync_thread() to fix deadlock"). Fix this problem by using stop_sync_thread() to unregister sync_thread, like md/raid did. Fixes: be83651f0050 ("DM RAID: Add message/status support for changing sync action") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-7-yukuai1@huaweicloud.com --- drivers/md/dm-raid.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e2d7a73c0f87..47c4b1b6e532 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3719,6 +3719,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; + int ret = 0; if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; @@ -3726,17 +3727,24 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) return -EBUSY; - if (!strcasecmp(argv[0], "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (!strcasecmp(argv[0], "frozen")) { + ret = mddev_lock(mddev); + if (ret) + return ret; - if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev); - } - } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) + md_frozen_sync_thread(mddev); + mddev_unlock(mddev); + } else if (!strcasecmp(argv[0], "idle")) { + ret = mddev_lock(mddev); + if (ret) + return ret; + + md_idle_sync_thread(mddev); + mddev_unlock(mddev); + } + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (decipher_sync_action(mddev, mddev->recovery) != st_idle) return -EBUSY; else if (!strcasecmp(argv[0], "resync")) ; /* MD_RECOVERY_NEEDED set below */ -- cgit v1.2.3 From 5625ff8b72b0e5c13b0fc1fc1f198155af45f729 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:04 +0800 Subject: dm-raid: add a new helper prepare_suspend() in md_personality There are no functional changes for now, prepare to fix a deadlock for dm-raid456. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-8-yukuai1@huaweicloud.com --- drivers/md/dm-raid.c | 18 ++++++++++++++++++ drivers/md/md.h | 1 + 2 files changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 47c4b1b6e532..7d48943acd57 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3803,6 +3803,23 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); } +static void raid_presuspend(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; + + if (!reshape_interrupted(mddev)) + return; + + /* + * For raid456, if reshape is interrupted, IO across reshape position + * will never make progress, while caller will wait for IO to be done. + * Inform raid456 to handle those IO to prevent deadlock. + */ + if (mddev->pers && mddev->pers->prepare_suspend) + mddev->pers->prepare_suspend(mddev); +} + static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; @@ -4087,6 +4104,7 @@ static struct target_type raid_target = { .message = raid_message, .iterate_devices = raid_iterate_devices, .io_hints = raid_io_hints, + .presuspend = raid_presuspend, .postsuspend = raid_postsuspend, .preresume = raid_preresume, .resume = raid_resume, diff --git a/drivers/md/md.h b/drivers/md/md.h index d5721bd77f63..8041df038b5e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -659,6 +659,7 @@ struct md_personality int (*start_reshape) (struct mddev *mddev); void (*finish_reshape) (struct mddev *mddev); void (*update_reshape_pos) (struct mddev *mddev); + void (*prepare_suspend) (struct mddev *mddev); /* quiesce suspends or resumes internal processing. * 1 - stop new actions and wait for action io to complete * 0 - return to normal behaviour -- cgit v1.2.3 From 41425f96d7aa59bc865f60f5dda3d7697b555677 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:05 +0800 Subject: dm-raid456, md/raid456: fix a deadlock for dm-raid456 while io concurrent with reshape For raid456, if reshape is still in progress, then IO across reshape position will wait for reshape to make progress. However, for dm-raid, in following cases reshape will never make progress hence IO will hang: 1) the array is read-only; 2) MD_RECOVERY_WAIT is set; 3) MD_RECOVERY_FROZEN is set; After commit c467e97f079f ("md/raid6: use valid sector values to determine if an I/O should wait on the reshape") fix the problem that IO across reshape position doesn't wait for reshape, the dm-raid test shell/lvconvert-raid-reshape.sh start to hang: [root@fedora ~]# cat /proc/979/stack [<0>] wait_woken+0x7d/0x90 [<0>] raid5_make_request+0x929/0x1d70 [raid456] [<0>] md_handle_request+0xc2/0x3b0 [md_mod] [<0>] raid_map+0x2c/0x50 [dm_raid] [<0>] __map_bio+0x251/0x380 [dm_mod] [<0>] dm_submit_bio+0x1f0/0x760 [dm_mod] [<0>] __submit_bio+0xc2/0x1c0 [<0>] submit_bio_noacct_nocheck+0x17f/0x450 [<0>] submit_bio_noacct+0x2bc/0x780 [<0>] submit_bio+0x70/0xc0 [<0>] mpage_readahead+0x169/0x1f0 [<0>] blkdev_readahead+0x18/0x30 [<0>] read_pages+0x7c/0x3b0 [<0>] page_cache_ra_unbounded+0x1ab/0x280 [<0>] force_page_cache_ra+0x9e/0x130 [<0>] page_cache_sync_ra+0x3b/0x110 [<0>] filemap_get_pages+0x143/0xa30 [<0>] filemap_read+0xdc/0x4b0 [<0>] blkdev_read_iter+0x75/0x200 [<0>] vfs_read+0x272/0x460 [<0>] ksys_read+0x7a/0x170 [<0>] __x64_sys_read+0x1c/0x30 [<0>] do_syscall_64+0xc6/0x230 [<0>] entry_SYSCALL_64_after_hwframe+0x6c/0x74 This is because reshape can't make progress. For md/raid, the problem doesn't exist because register new sync_thread doesn't rely on the IO to be done any more: 1) If array is read-only, it can switch to read-write by ioctl/sysfs; 2) md/raid never set MD_RECOVERY_WAIT; 3) If MD_RECOVERY_FROZEN is set, mddev_suspend() doesn't hold 'reconfig_mutex', hence it can be cleared and reshape can continue by sysfs api 'sync_action'. However, I'm not sure yet how to avoid the problem in dm-raid yet. This patch on the one hand make sure raid_message() can't change sync_thread() through raid_message() after presuspend(), on the other hand detect the above 3 cases before wait for IO do be done in dm_suspend(), and let dm-raid requeue those IO. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-9-yukuai1@huaweicloud.com --- drivers/md/dm-raid.c | 22 ++++++++++++++++++++-- drivers/md/md.c | 24 ++++++++++++++++++++++-- drivers/md/md.h | 3 ++- drivers/md/raid5.c | 32 ++++++++++++++++++++++++++++++-- 4 files changed, 74 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7d48943acd57..ea45f777691c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -213,6 +213,7 @@ struct raid_dev { #define RT_FLAG_RS_IN_SYNC 6 #define RT_FLAG_RS_RESYNCING 7 #define RT_FLAG_RS_GROW 8 +#define RT_FLAG_RS_FROZEN 9 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -3340,7 +3341,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio) if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; - md_handle_request(mddev, bio); + if (unlikely(!md_handle_request(mddev, bio))) + return DM_MAPIO_REQUEUE; return DM_MAPIO_SUBMITTED; } @@ -3724,7 +3726,8 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) + if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) || + test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags)) return -EBUSY; if (!strcasecmp(argv[0], "frozen")) { @@ -3808,6 +3811,12 @@ static void raid_presuspend(struct dm_target *ti) struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; + /* + * From now on, disallow raid_message() to change sync_thread until + * resume, raid_postsuspend() is too late. + */ + set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); + if (!reshape_interrupted(mddev)) return; @@ -3820,6 +3829,13 @@ static void raid_presuspend(struct dm_target *ti) mddev->pers->prepare_suspend(mddev); } +static void raid_presuspend_undo(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); +} + static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; @@ -4085,6 +4101,7 @@ static void raid_resume(struct dm_target *ti) WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); mddev_lock_nointr(mddev); mddev->ro = 0; mddev->in_sync = 0; @@ -4105,6 +4122,7 @@ static struct target_type raid_target = { .iterate_devices = raid_iterate_devices, .io_hints = raid_io_hints, .presuspend = raid_presuspend, + .presuspend_undo = raid_presuspend_undo, .postsuspend = raid_postsuspend, .preresume = raid_preresume, .resume = raid_resume, diff --git a/drivers/md/md.c b/drivers/md/md.c index 167db7744239..64d544f09295 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -366,7 +366,7 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio) return true; } -void md_handle_request(struct mddev *mddev, struct bio *bio) +bool md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: if (is_suspended(mddev, bio)) { @@ -374,7 +374,7 @@ check_suspended: /* Bail out if REQ_NOWAIT is set for the bio */ if (bio->bi_opf & REQ_NOWAIT) { bio_wouldblock_error(bio); - return; + return true; } for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, @@ -390,10 +390,13 @@ check_suspended: if (!mddev->pers->make_request(mddev, bio)) { percpu_ref_put(&mddev->active_io); + if (!mddev->gendisk && mddev->pers->prepare_suspend) + return false; goto check_suspended; } percpu_ref_put(&mddev->active_io); + return true; } EXPORT_SYMBOL(md_handle_request); @@ -8733,6 +8736,23 @@ void md_account_bio(struct mddev *mddev, struct bio **bio) } EXPORT_SYMBOL_GPL(md_account_bio); +void md_free_cloned_bio(struct bio *bio) +{ + struct md_io_clone *md_io_clone = bio->bi_private; + struct bio *orig_bio = md_io_clone->orig_bio; + struct mddev *mddev = md_io_clone->mddev; + + if (bio->bi_status && !orig_bio->bi_status) + orig_bio->bi_status = bio->bi_status; + + if (md_io_clone->start_time) + bio_end_io_acct(orig_bio, md_io_clone->start_time); + + bio_put(bio); + percpu_ref_put(&mddev->active_io); +} +EXPORT_SYMBOL_GPL(md_free_cloned_bio); + /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index 8041df038b5e..556db28a49aa 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -793,6 +793,7 @@ extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); void md_account_bio(struct mddev *mddev, struct bio **bio); +void md_free_cloned_bio(struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, @@ -821,7 +822,7 @@ extern void md_stop_writes(struct mddev *mddev); extern int md_rdev_init(struct md_rdev *rdev); extern void md_rdev_clear(struct md_rdev *rdev); -extern void md_handle_request(struct mddev *mddev, struct bio *bio); +extern bool md_handle_request(struct mddev *mddev, struct bio *bio); extern int mddev_suspend(struct mddev *mddev, bool interruptible); extern void mddev_resume(struct mddev *mddev); extern void md_idle_sync_thread(struct mddev *mddev); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 48129de21aec..388da55e274e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -760,6 +760,7 @@ enum stripe_result { STRIPE_RETRY, STRIPE_SCHEDULE_AND_RETRY, STRIPE_FAIL, + STRIPE_WAIT_RESHAPE, }; struct stripe_request_ctx { @@ -5937,7 +5938,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, if (ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) { spin_unlock_irq(&conf->device_lock); - return STRIPE_SCHEDULE_AND_RETRY; + ret = STRIPE_SCHEDULE_AND_RETRY; + goto out; } } spin_unlock_irq(&conf->device_lock); @@ -6016,6 +6018,12 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, out_release: raid5_release_stripe(sh); +out: + if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) { + bi->bi_status = BLK_STS_RESOURCE; + ret = STRIPE_WAIT_RESHAPE; + pr_err_ratelimited("dm-raid456: io across reshape position while reshape can't make progress"); + } return ret; } @@ -6137,7 +6145,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) while (1) { res = make_stripe_request(mddev, conf, &ctx, logical_sector, bi); - if (res == STRIPE_FAIL) + if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE) break; if (res == STRIPE_RETRY) @@ -6175,6 +6183,11 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) if (rw == WRITE) md_write_end(mddev); + if (res == STRIPE_WAIT_RESHAPE) { + md_free_cloned_bio(bi); + return false; + } + bio_endio(bi); return true; } @@ -8925,6 +8938,18 @@ static int raid5_start(struct mddev *mddev) return r5l_start(conf->log); } +/* + * This is only used for dm-raid456, caller already frozen sync_thread, hence + * if rehsape is still in progress, io that is waiting for reshape can never be + * done now, hence wake up and handle those IO. + */ +static void raid5_prepare_suspend(struct mddev *mddev) +{ + struct r5conf *conf = mddev->private; + + wake_up(&conf->wait_for_overlap); +} + static struct md_personality raid6_personality = { .name = "raid6", @@ -8948,6 +8973,7 @@ static struct md_personality raid6_personality = .quiesce = raid5_quiesce, .takeover = raid6_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static struct md_personality raid5_personality = { @@ -8972,6 +8998,7 @@ static struct md_personality raid5_personality = .quiesce = raid5_quiesce, .takeover = raid5_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static struct md_personality raid4_personality = @@ -8997,6 +9024,7 @@ static struct md_personality raid4_personality = .quiesce = raid5_quiesce, .takeover = raid4_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static int __init raid5_init(void) -- cgit v1.2.3 From 95009ae904b1e9dca8db6f649f2d7c18a6e42c75 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Mar 2024 15:23:06 +0800 Subject: dm-raid: fix lockdep waring in "pers->hot_add_disk" The lockdep assert is added by commit a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") in print_conf(). And I didn't notice that dm-raid is calling "pers->hot_add_disk" without holding 'reconfig_mutex'. "pers->hot_add_disk" read and write many fields that is protected by 'reconfig_mutex', and raid_resume() already grab the lock in other contex. Hence fix this problem by protecting "pers->host_add_disk" with the lock. Fixes: 9092c02d9435 ("DM RAID: Add ability to restore transiently failed devices on resume") Fixes: a448af25becf ("md/raid10: remove rcu protection to access rdev from conf") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Xiao Ni Acked-by: Mike Snitzer Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240305072306.2562024-10-yukuai1@huaweicloud.com --- drivers/md/dm-raid.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index ea45f777691c..17e9af60bbf7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -4091,7 +4091,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { -- cgit v1.2.3 From f267f262815033452195f46c43b572159262f533 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 5 Mar 2024 10:08:28 +0100 Subject: xdp, bonding: Fix feature flags when there are no slave devs anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") changed the driver from reporting everything as supported before a device was bonded into having the driver report that no XDP feature is supported until a real device is bonded as it seems to be more truthful given eventually real underlying devices decide what XDP features are supported. The change however did not take into account when all slave devices get removed from the bond device. In this case after 9b0ed890ac2a, the driver keeps reporting a feature mask of 0x77, that is, NETDEV_XDP_ACT_MASK & ~NETDEV_XDP_ACT_XSK_ZEROCOPY whereas it should have reported a feature mask of 0. Fix it by resetting XDP feature flags in the same way as if no XDP program is attached to the bond device. This was uncovered by the XDP bond selftest which let BPF CI fail. After adjusting the starting masks on the latter to 0 instead of NETDEV_XDP_ACT_MASK the test passes again together with this fix. Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") Signed-off-by: Daniel Borkmann Cc: Magnus Karlsson Cc: Prashant Batra Cc: Toke Høiland-Jørgensen Cc: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Message-ID: <20240305090829.17131-1-daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a11748b8d69b..cd0683bcca03 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev) ASSERT_RTNL(); - if (!bond_xdp_check(bond)) { + if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { xdp_clear_features_flag(bond_dev); return; } -- cgit v1.2.3 From 289e922582af5b4721ba02e86bde4d9ba918158a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 17:35:32 -0800 Subject: dpll: move all dpll<>netdev helpers to dpll code Older versions of GCC really want to know the full definition of the type involved in rcu_assign_pointer(). struct dpll_pin is defined in a local header, net/core can't reach it. Move all the netdev <> dpll code into dpll, where the type is known. Otherwise we'd need multiple function calls to jump between the compilation units. This is the same problem the commit under fixes was trying to address, but with rcu_assign_pointer() not rcu_dereference(). Some of the exports are not needed, networking core can't be a module, we only need exports for the helpers used by drivers. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/35a869c8-52e8-177-1d4d-e57578b99b6@linux-m68k.org/ Fixes: 640f41ed33b5 ("dpll: fix build failure due to rcu_dereference_check() on unknown type") Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240305013532.694866-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/driver-api/dpll.rst | 2 +- drivers/dpll/dpll_core.c | 25 +++++++++++++---- drivers/dpll/dpll_netlink.c | 38 ++++++++++++++++---------- drivers/net/ethernet/intel/ice/ice_dpll.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/dpll.c | 4 +-- include/linux/dpll.h | 26 +++++++++--------- include/linux/netdevice.h | 4 --- net/core/dev.c | 22 --------------- net/core/rtnetlink.c | 4 +-- 9 files changed, 64 insertions(+), 65 deletions(-) (limited to 'drivers') diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index e3d593841aa7..ea8d16600e16 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -545,7 +545,7 @@ In such scenario, dpll device input signal shall be also configurable to drive dpll with signal recovered from the PHY netdevice. This is done by exposing a pin to the netdevice - attaching pin to the netdevice itself with -``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. +``dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in nested attribute ``IFLA_DPLL_PIN``. diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 241db366b2c7..7f686d179fc9 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,11 +42,6 @@ struct dpll_pin_registration { void *priv; }; -struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->dpll_pin); -} - struct dpll_device *dpll_device_get_by_id(int id) { if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) @@ -513,6 +508,26 @@ err_pin_prop: return ERR_PTR(ret); } +static void dpll_netdev_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + rtnl_lock(); + rcu_assign_pointer(dev->dpll_pin, dpll_pin); + rtnl_unlock(); +} + +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + dpll_netdev_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(dpll_netdev_pin_set); + +void dpll_netdev_pin_clear(struct net_device *dev) +{ + dpll_netdev_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(dpll_netdev_pin_clear); + /** * dpll_pin_get - find existing or create new dpll pin * @clock_id: clock_id of creator diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 4ca9ad16cd95..b57355e0c214 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include "dpll_core.h" #include "dpll_netlink.h" @@ -47,18 +48,6 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) return 0; } -/** - * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin - * @pin: pin pointer - * - * Return: byte size of pin handle attribute for given pin. - */ -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) -{ - return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ -} -EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); - /** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle @@ -68,7 +57,7 @@ EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -76,7 +65,28 @@ int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } -EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); + +static struct dpll_pin *dpll_netdev_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + +/** + * dpll_netdev_pin_handle_size - get size of pin handle attribute of a netdev + * @dev: netdev from which to get the pin + * + * Return: byte size of pin handle attribute, or 0 if @dev has no pin. + */ +size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return dpll_netdev_pin(dev) ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} + +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev) +{ + return dpll_msg_add_pin_handle(msg, dpll_netdev_pin(dev)); +} static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..c59e972dbaae 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1597,7 +1597,7 @@ static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf) } if (WARN_ON_ONCE(!vsi || !vsi->netdev)) return; - netdev_dpll_pin_clear(vsi->netdev); + dpll_netdev_pin_clear(vsi->netdev); dpll_pin_put(rclk->pin); } @@ -1641,7 +1641,7 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin, } if (WARN_ON((!vsi || !vsi->netdev))) return -EINVAL; - netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin); + dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index 928bf24d4b12..d74a5aaf4268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -261,7 +261,7 @@ static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, { if (mdpll->tracking_netdev) return; - netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + dpll_netdev_pin_set(netdev, mdpll->dpll_pin); mdpll->tracking_netdev = netdev; } @@ -269,7 +269,7 @@ static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) { if (!mdpll->tracking_netdev) return; - netdev_dpll_pin_clear(mdpll->tracking_netdev); + dpll_netdev_pin_clear(mdpll->tracking_netdev); mdpll->tracking_netdev = NULL; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h index c60591308ae8..e37344f6a231 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -122,15 +122,24 @@ struct dpll_pin_properties { }; #if IS_ENABLED(CONFIG_DPLL) -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); #else -static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) { return 0; } -static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) { return 0; } @@ -169,13 +178,4 @@ int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); -#if !IS_ENABLED(CONFIG_DPLL) -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ - return NULL; -} -#else -struct dpll_pin *netdev_dpll_pin(const struct net_device *dev); -#endif - #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 735a9386fcf8..78a09af89e39 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,8 +79,6 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; -/* DPLL specific */ -struct dpll_pin; typedef u32 xdp_features_t; @@ -4042,8 +4040,6 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); -void netdev_dpll_pin_clear(struct net_device *dev); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 0230391c78f7..76e6438f4858 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9074,28 +9074,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) -{ -#if IS_ENABLED(CONFIG_DPLL) - rtnl_lock(); - rcu_assign_pointer(dev->dpll_pin, dpll_pin); - rtnl_unlock(); -#endif -} - -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) -{ - WARN_ON(!dpll_pin); - netdev_dpll_pin_assign(dev, dpll_pin); -} -EXPORT_SYMBOL(netdev_dpll_pin_set); - -void netdev_dpll_pin_clear(struct net_device *dev) -{ - netdev_dpll_pin_assign(dev, NULL); -} -EXPORT_SYMBOL(netdev_dpll_pin_clear); - /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ae86f751efc3..bd50e9fe3234 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1057,7 +1057,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) { size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ - size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + size += dpll_netdev_pin_handle_size(dev); return size; } @@ -1792,7 +1792,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb, if (!dpll_pin_nest) return -EMSGSIZE; - ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + ret = dpll_netdev_add_pin_handle(skb, dev); if (ret < 0) goto nest_cancel; -- cgit v1.2.3 From b7fb7729c94fb2d23c79ff44f7a2da089c92d81c Mon Sep 17 00:00:00 2001 From: "Tobias Jakobi (Compleo)" Date: Mon, 4 Mar 2024 16:41:35 +0100 Subject: net: dsa: microchip: fix register write order in ksz8_ind_write8() This bug was noticed while re-implementing parts of the kernel driver in userspace using spidev. The goal was to enable some of the errata workarounds that Microchip describes in their errata sheet [1]. Both the errata sheet and the regular datasheet of e.g. the KSZ8795 imply that you need to do this for indirect register accesses: - write a 16-bit value to a control register pair (this value consists of the indirect register table, and the offset inside the table) - either read or write an 8-bit value from the data storage register (indicated by REG_IND_BYTE in the kernel) The current implementation has the order swapped. It can be proven, by reading back some indirect register with known content (the EEE register modified in ksz8_handle_global_errata() is one of these), that this implementation does not work. Private discussion with Oleksij Rempel of Pengutronix has revealed that the workaround was apparantly never tested on actual hardware. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/Errata/KSZ87xx-Errata-DS80000687C.pdf Signed-off-by: Tobias Jakobi (Compleo) Reviewed-by: Oleksij Rempel Fixes: 7b6e6235b664 ("net: dsa: microchip: ksz8795: handle eee specif erratum") Link: https://lore.kernel.org/r/20240304154135.161332-1-tobias.jakobi.compleo@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8795.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 61b71bcfe396..c3da97abce20 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); -- cgit v1.2.3 From bd17b7c34fadef645becde1245b9394f69f31702 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Mar 2024 08:30:46 +0300 Subject: RAS/AMD/FMPM: Fix off by one when unwinding on error Decrement the index variable i before the first iteration when freeing the remaining elements on error. Depending on where this fails it could free something from one element beyond the end of the fru_records[] array. [ bp: Massage commit message. ] Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/6fdec71a-846b-4cd0-af69-e5f6cd12f4f6@moroto.mountain --- drivers/ras/amd/fmpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 0963c9e7b853..2f4ac9591c8f 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -817,7 +817,7 @@ static int allocate_records(void) return ret; out_free: - for (; i >= 0; i--) + while (--i >= 0) kfree(fru_records[i]); kfree(fru_records); -- cgit v1.2.3 From 26d2b757fff02bbe971abc39071e263aa0cab924 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 22 Feb 2024 12:32:40 +0100 Subject: drm/i915/selftests: Fix dependency of some timeouts on HZ Third argument of i915_request_wait() accepts a timeout value in jiffies. Most users pass either a simple HZ based expression, or a result of msecs_to_jiffies(), or MAX_SCHEDULE_TIMEOUT, or a very small number not exceeding 4 if applicable as that value. However, there is one user -- intel_selftest_wait_for_rq() -- that passes a WAIT_FOR_RESET_TIME symbol, defined as a large constant value that most probably represents a desired timeout in ms. While that usage results in the intended value of timeout on usual x86_64 kernel configurations, it is not portable across different architectures and custom kernel configs. Rename the symbol to clearly indicate intended units and convert it to jiffies before use. Fixes: 3a4bfa091c46 ("drm/i915/selftest: Fix workarounds selftest for GuC submission") Signed-off-by: Janusz Krzysztofik Cc: Rahul Kumar Singh Cc: John Harrison Cc: Matthew Brost Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240222113347.648945-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 6ee3f54b880c91ab2e244eb4ffd4bfed37832b25) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 2990dd4d4a0d..e14ac0ab1314 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -3,6 +3,8 @@ * Copyright © 2021 Intel Corporation */ +#include + //#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "i915_drv.h" @@ -12,7 +14,7 @@ #define REDUCED_TIMESLICE 5 #define REDUCED_PREEMPT 10 -#define WAIT_FOR_RESET_TIME 10000 +#define WAIT_FOR_RESET_TIME_MS 10000 struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt) { @@ -91,7 +93,7 @@ int intel_selftest_wait_for_rq(struct i915_request *rq) { long ret; - ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); + ret = i915_request_wait(rq, 0, msecs_to_jiffies(WAIT_FOR_RESET_TIME_MS)); if (ret < 0) return ret; -- cgit v1.2.3 From 0848814aa296ca13e4f03848f35d2d29fc7fc30c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Feb 2024 15:26:31 +0200 Subject: drm/i915/dp: Fix connector DSC HW state readout The DSC HW state of DP connectors is read out during driver loading and system resume in intel_modeset_update_connector_atomic_state(). This function is called for all connectors though and so the state of DSI connectors will also get updated incorrectly, triggering a WARN there wrt. the DSC decompression AUX device. Fix the above by moving the DSC state readout to a new DP connector specific sync_state() hook. This is anyway the logical place to update the connector object's state vs. the connector's atomic state. Fixes: b2608c6b3212 ("drm/i915/dp_mst: Enable MST DSC decompression for all streams") Reported-and-tested-by: Drew Davenport Closes: https://lore.kernel.org/all/Zb0q8IDVXS0HxJyj@chromium.org Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240205132631.1588577-1-imre.deak@intel.com (cherry picked from commit a62e145981500996ea76af3d740ce0c0d74c5be0) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display_types.h | 7 +++++++ drivers/gpu/drm/i915/display/intel_dp.c | 13 +++++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 2 ++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + drivers/gpu/drm/i915/display/intel_modeset_setup.c | 13 ++++++------- 5 files changed, 29 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 3fdd8a517983..ac7fe6281afe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -609,6 +609,13 @@ struct intel_connector { * and active (i.e. dpms ON state). */ bool (*get_hw_state)(struct intel_connector *); + /* + * Optional hook called during init/resume to sync any state + * stored in the connector (eg. DSC state) wrt. the HW state. + */ + void (*sync_state)(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state); + /* Panel info for eDP and LVDS */ struct intel_panel panel; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index ae647d03af25..38efc8d177d0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5859,6 +5859,19 @@ intel_dp_connector_unregister(struct drm_connector *connector) intel_connector_unregister(connector); } +void intel_dp_connector_sync_state(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(connector->base.dev); + + if (crtc_state && crtc_state->dsc.compression_enable) { + drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); + connector->dp.dsc_decompression_enabled = true; + } else { + connector->dp.dsc_decompression_enabled = false; + } +} + void intel_dp_encoder_flush_work(struct drm_encoder *encoder) { struct intel_digital_port *dig_port = enc_to_dig_port(to_intel_encoder(encoder)); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 05db46b111f2..375d0677cd8c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -45,6 +45,8 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, int intel_dp_min_bpp(enum intel_output_format output_format); bool intel_dp_init_connector(struct intel_digital_port *dig_port, struct intel_connector *intel_connector); +void intel_dp_connector_sync_state(struct intel_connector *connector, + const struct intel_crtc_state *crtc_state); void intel_dp_set_link_params(struct intel_dp *intel_dp, int link_rate, int lane_count); int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8a9432335030..a01a59f57ae5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1534,6 +1534,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo return NULL; intel_connector->get_hw_state = intel_dp_mst_get_hw_state; + intel_connector->sync_state = intel_dp_connector_sync_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; drm_dp_mst_get_port_malloc(port); diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 94eece7f63be..caeca3a8442c 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -318,12 +318,6 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); - if (crtc_state->dsc.compression_enable) { - drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux); - connector->dp.dsc_decompression_enabled = true; - } else { - connector->dp.dsc_decompression_enabled = false; - } conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3; } } @@ -775,8 +769,9 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) drm_connector_list_iter_begin(&i915->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { + struct intel_crtc_state *crtc_state = NULL; + if (connector->get_hw_state(connector)) { - struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; connector->base.dpms = DRM_MODE_DPMS_ON; @@ -802,6 +797,10 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915) connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; } + + if (connector->sync_state) + connector->sync_state(connector, crtc_state); + drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] hw state readout: %s\n", connector->base.base.id, connector->base.name, -- cgit v1.2.3 From 575801663c7dc38f826212b39e3b91a4a8661c33 Mon Sep 17 00:00:00 2001 From: Edmund Raile Date: Thu, 29 Feb 2024 14:47:59 +0000 Subject: firewire: ohci: prevent leak of left-over IRQ on unbind Commit 5a95f1ded28691e6 ("firewire: ohci: use devres for requested IRQ") also removed the call to free_irq() in pci_remove(), leading to a leftover irq of devm_request_irq() at pci_disable_msi() in pci_remove() when unbinding the driver from the device remove_proc_entry: removing non-empty directory 'irq/136', leaking at least 'firewire_ohci' Call Trace: ? remove_proc_entry+0x19c/0x1c0 ? __warn+0x81/0x130 ? remove_proc_entry+0x19c/0x1c0 ? report_bug+0x171/0x1a0 ? console_unlock+0x78/0x120 ? handle_bug+0x3c/0x80 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? remove_proc_entry+0x19c/0x1c0 unregister_irq_proc+0xf4/0x120 free_desc+0x3d/0xe0 ? kfree+0x29f/0x2f0 irq_free_descs+0x47/0x70 msi_domain_free_locked.part.0+0x19d/0x1d0 msi_domain_free_irqs_all_locked+0x81/0xc0 pci_free_msi_irqs+0x12/0x40 pci_disable_msi+0x4c/0x60 pci_remove+0x9d/0xc0 [firewire_ohci 01b483699bebf9cb07a3d69df0aa2bee71db1b26] pci_device_remove+0x37/0xa0 device_release_driver_internal+0x19f/0x200 unbind_store+0xa1/0xb0 remove irq with devm_free_irq() before pci_disable_msi() also remove it in fail_msi: of pci_probe() as this would lead to an identical leak Cc: stable@vger.kernel.org Fixes: 5a95f1ded28691e6 ("firewire: ohci: use devres for requested IRQ") Signed-off-by: Edmund Raile Link: https://lore.kernel.org/r/20240229144723.13047-2-edmund.raile@proton.me Signed-off-by: Takashi Sakamoto --- drivers/firewire/ohci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9db9290c3269..7bc71f4be64a 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -3773,6 +3773,7 @@ static int pci_probe(struct pci_dev *dev, return 0; fail_msi: + devm_free_irq(&dev->dev, dev->irq, ohci); pci_disable_msi(dev); return err; @@ -3800,6 +3801,7 @@ static void pci_remove(struct pci_dev *dev) software_reset(ohci); + devm_free_irq(&dev->dev, dev->irq, ohci); pci_disable_msi(dev); dev_notice(&dev->dev, "removing fw-ohci device\n"); -- cgit v1.2.3 From 984318aaf7b6516d03a2971a4a37bab4ea648461 Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Thu, 29 Feb 2024 10:07:16 +0530 Subject: drm/i915/panelreplay: Move out psr_init_dpcd() from init_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move psr_init_dpcd() from init-connector to connector-detect function. The dpcd probe for checking panel replay capability for external dp connector is causing delay during boot which can be optimized by moving dpcd probe to connector specific detect(). v1: Initial version. v2: Add details in commit description. [Jani] Suggested-by: Ville Syrjälä Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10284 Signed-off-by: Animesh Manna Fixes: cceeaa312d39 ("drm/i915/panelreplay: Enable panel replay dpcd initialization for DP") Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240229043716.4065760-1-animesh.manna@intel.com (cherry picked from commit 1cca19bf296fae0636a637b48d195ac6b4d430c9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ drivers/gpu/drm/i915/display/intel_psr.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 38efc8d177d0..94d2a15d8444 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5699,6 +5699,9 @@ intel_dp_detect(struct drm_connector *connector, goto out; } + if (!intel_dp_is_edp(intel_dp)) + intel_psr_init_dpcd(intel_dp); + intel_dp_detect_dsc_caps(intel_dp, intel_connector); intel_dp_configure_mst(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 57bbf3e3af92..4faaf4b3fc53 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2776,9 +2776,6 @@ void intel_psr_init(struct intel_dp *intel_dp) if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv))) return; - if (!intel_dp_is_edp(intel_dp)) - intel_psr_init_dpcd(intel_dp); - /* * HSW spec explicitly says PSR is tied to port A. * BDW+ platforms have a instance of PSR registers per transcoder but -- cgit v1.2.3 From 8d0d2447394b13fb22a069f0330f9c49b7fff9d3 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 6 Mar 2024 15:03:03 +0900 Subject: nvme: host: fix double-free of struct nvme_id_ns in ns_update_nuse() When nvme_identify_ns() fails, it frees the pointer to the struct nvme_id_ns before it returns. However, ns_update_nuse() calls kfree() for the pointer even when nvme_identify_ns() fails. This results in KASAN double-free, which was observed with blktests nvme/045 with proposed patches [1] on the kernel v6.8-rc7. Fix the double-free by skipping kfree() when nvme_identify_ns() fails. Link: https://lore.kernel.org/linux-block/20240304161303.19681-1-dwagner@suse.de/ [1] Fixes: a1a825ab6a60 ("nvme: add csi, ms and nuse to sysfs") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index d099218e494a..6c7f1d5c056f 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -221,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns) ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); if (ret) - goto out_free_id; + return ret; ns->head->nuse = le64_to_cpu(id->nuse); - -out_free_id: kfree(id); - - return ret; + return 0; } static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, -- cgit v1.2.3 From 7e80eb792bd7377a20f204943ac31c77d859be89 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 Mar 2024 06:20:30 -0800 Subject: nvme: clear caller pointer on identify failure The memory allocated for the identification is freed on failure. Set it to NULL so the caller doesn't have a pointer to that freed address. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c4d928585ce3..2baf5786a92f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1403,8 +1403,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, sizeof(struct nvme_id_ctrl)); - if (error) + if (error) { kfree(*id); + *id = NULL; + } return error; } @@ -1533,6 +1535,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); kfree(*id); + *id = NULL; } return error; } -- cgit v1.2.3 From d2d7b8e88023b75320662c2305d61779ff060950 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:27 +0100 Subject: phy: qcom-qmp-combo: fix drm bridge registration Due to a long-standing issue in driver core, drivers may not probe defer after having registered child devices to avoid triggering a probe deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). This could potentially also trigger a bug in the DRM bridge implementation which does not expect bridges to go away even if device links may avoid triggering this (when enabled). Move registration of the DRM aux bridge to after looking up clocks and other resources. Note that PHY creation can in theory also trigger a probe deferral when a 'phy' supply is used. This does not seem to affect the QMP PHY driver but the PHY subsystem should be reworked to address this (i.e. by separating initialisation and registration of the PHY). Fixes: 35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE") Fixes: 1904c3f578dc ("phy: qcom-qmp-combo: Introduce drm_bridge") Cc: stable@vger.kernel.org # 6.5 Cc: Bjorn Andersson Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Acked-by: Vinod Koul Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20240217150228.5788-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 1ad10110dd25..e19d6a084f10 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -3566,10 +3566,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; - ret = drm_aux_bridge_register(dev); - if (ret) - return ret; - /* Check for legacy binding with child nodes. */ usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); if (usb_np) { @@ -3589,6 +3585,10 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) goto err_node_put; + ret = drm_aux_bridge_register(dev); + if (ret) + goto err_node_put; + pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) -- cgit v1.2.3 From 47b412c1ea77112f1148b4edd71700a388c7c80f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:28 +0100 Subject: phy: qcom-qmp-combo: fix type-c switch registration Due to a long-standing issue in driver core, drivers may not probe defer after having registered child devices to avoid triggering a probe deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). Move registration of the typec switch to after looking up clocks and other resources. Note that PHY creation can in theory also trigger a probe deferral when a 'phy' supply is used. This does not seem to affect the QMP PHY driver but the PHY subsystem should be reworked to address this (i.e. by separating initialisation and registration of the PHY). Fixes: 2851117f8f42 ("phy: qcom-qmp-combo: Introduce orientation switching") Cc: stable@vger.kernel.org # 6.5 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Acked-by: Vinod Koul Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20240217150228.5788-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index e19d6a084f10..17c4ad7553a5 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -3562,10 +3562,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; - ret = qmp_combo_typec_switch_register(qmp); - if (ret) - return ret; - /* Check for legacy binding with child nodes. */ usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); if (usb_np) { @@ -3585,6 +3581,10 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) goto err_node_put; + ret = qmp_combo_typec_switch_register(qmp); + if (ret) + goto err_node_put; + ret = drm_aux_bridge_register(dev); if (ret) goto err_node_put; -- cgit v1.2.3 From 317f86dc1b8e219e799271042a17d56a95a935bc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Mar 2024 06:37:22 -0800 Subject: Revert "drm/udl: Add ARGB8888 as a format" This reverts commit 95bf25bb9ed5dedb7fb39f76489f7d6843ab0475. Apparently there was a previous discussion about emulation of formats and it was decided XRGB8888 was the only format to support for legacy userspace [1]. Remove ARGB8888. Userspace needs to be fixed to accept XRGB8888. [1] https://lore.kernel.org/r/60dc7697-d7a0-4bf4-a22e-32f1bbb792c2@suse.de Acked-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240306063721.1.I4a32475190334e1fa4eef4700ecd2787a43c94b5@changeid --- drivers/gpu/drm/udl/udl_modeset.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 0f8d3678770e..7702359c90c2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -253,7 +253,6 @@ static int udl_handle_damage(struct drm_framebuffer *fb, static const uint32_t udl_primary_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, }; static const uint64_t udl_primary_plane_fmtmods[] = { -- cgit v1.2.3 From 41463f2dfde2824a817789d635be8111cff463f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Feb 2024 05:37:40 -0800 Subject: dasd: cleamup dasd_state_basic_to_ready Reflow dasd_state_basic_to_ready a bit to make it easier to modify. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20240228133742.806274-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 54 +++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 2f3adf5d8fee..e754e4f81b2d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -307,39 +307,37 @@ static int dasd_state_basic_to_known(struct dasd_device *device) */ static int dasd_state_basic_to_ready(struct dasd_device *device) { - int rc; - struct dasd_block *block; - struct gendisk *disk; + struct dasd_block *block = device->block; + int rc = 0; - rc = 0; - block = device->block; /* make disk known with correct capacity */ - if (block) { - if (block->base->discipline->do_analysis != NULL) - rc = block->base->discipline->do_analysis(block); - if (rc) { - if (rc != -EAGAIN) { - device->state = DASD_STATE_UNFMT; - disk = device->block->gdp; - kobject_uevent(&disk_to_dev(disk)->kobj, - KOBJ_CHANGE); - goto out; - } - return rc; - } - if (device->discipline->setup_blk_queue) - device->discipline->setup_blk_queue(block); - set_capacity(block->gdp, - block->blocks << block->s2b_shift); + if (!block) { device->state = DASD_STATE_READY; - rc = dasd_scan_partitions(block); - if (rc) { - device->state = DASD_STATE_BASIC; + goto out; + } + + if (block->base->discipline->do_analysis != NULL) + rc = block->base->discipline->do_analysis(block); + if (rc) { + if (rc == -EAGAIN) return rc; - } - } else { - device->state = DASD_STATE_READY; + device->state = DASD_STATE_UNFMT; + kobject_uevent(&disk_to_dev(device->block->gdp)->kobj, + KOBJ_CHANGE); + goto out; } + + if (device->discipline->setup_blk_queue) + device->discipline->setup_blk_queue(block); + set_capacity(block->gdp, block->blocks << block->s2b_shift); + device->state = DASD_STATE_READY; + + rc = dasd_scan_partitions(block); + if (rc) { + device->state = DASD_STATE_BASIC; + return rc; + } + out: if (device->discipline->basic_to_ready) rc = device->discipline->basic_to_ready(device); -- cgit v1.2.3 From 0127a47f58c6bb7b54386960ee66864b937269eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Feb 2024 05:37:41 -0800 Subject: dasd: move queue setup to common code Most of the code in setup_blk_queue is shared between all disciplines. Move it to common code and leave a method to query the maximum number of transferable blocks, and a flag to indicate discard support. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20240228133742.806274-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 29 +++++++++++++++++++++++++++-- drivers/s390/block/dasd_diag.c | 22 +++------------------- drivers/s390/block/dasd_eckd.c | 29 ++++++----------------------- drivers/s390/block/dasd_fba.c | 33 ++++----------------------------- drivers/s390/block/dasd_int.h | 6 ++---- 5 files changed, 42 insertions(+), 77 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e754e4f81b2d..bdeab447adfc 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -308,6 +308,7 @@ static int dasd_state_basic_to_known(struct dasd_device *device) static int dasd_state_basic_to_ready(struct dasd_device *device) { struct dasd_block *block = device->block; + struct request_queue *q; int rc = 0; /* make disk known with correct capacity */ @@ -327,8 +328,32 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) goto out; } - if (device->discipline->setup_blk_queue) - device->discipline->setup_blk_queue(block); + q = block->gdp->queue; + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + q->limits.max_dev_sectors = device->discipline->max_sectors(block); + blk_queue_max_hw_sectors(q, q->limits.max_dev_sectors); + blk_queue_logical_block_size(q, block->bp_block); + blk_queue_max_segments(q, USHRT_MAX); + + /* With page sized segments each segment can be translated into one idaw/tidaw */ + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); + blk_queue_dma_alignment(q, PAGE_SIZE - 1); + + if (device->discipline->has_discard) { + unsigned int max_bytes, max_discard_sectors; + + q->limits.discard_granularity = block->bp_block; + + /* Calculate max_discard_sectors and make it PAGE aligned */ + max_bytes = USHRT_MAX * block->bp_block; + max_bytes = ALIGN_DOWN(max_bytes, PAGE_SIZE); + max_discard_sectors = max_bytes / block->bp_block; + + blk_queue_max_discard_sectors(q, max_discard_sectors); + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + } + set_capacity(block->gdp, block->blocks << block->s2b_shift); device->state = DASD_STATE_READY; diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 041088c7e909..ea4b1d01bb76 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -617,25 +617,9 @@ dasd_diag_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, "dump sense not available for DIAG data"); } -/* - * Initialize block layer request queue. - */ -static void dasd_diag_setup_blk_queue(struct dasd_block *block) +static unsigned int dasd_diag_max_sectors(struct dasd_block *block) { - unsigned int logical_block_size = block->bp_block; - struct request_queue *q = block->gdp->queue; - int max; - - max = DIAG_MAX_BLOCKS << block->s2b_shift; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - q->limits.max_dev_sectors = max; - blk_queue_logical_block_size(q, logical_block_size); - blk_queue_max_hw_sectors(q, max); - blk_queue_max_segments(q, USHRT_MAX); - /* With page sized segments each segment can be translated into one idaw/tidaw */ - blk_queue_max_segment_size(q, PAGE_SIZE); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_dma_alignment(q, PAGE_SIZE - 1); + return DIAG_MAX_BLOCKS << block->s2b_shift; } static int dasd_diag_pe_handler(struct dasd_device *device, @@ -648,10 +632,10 @@ static struct dasd_discipline dasd_diag_discipline = { .owner = THIS_MODULE, .name = "DIAG", .ebcname = "DIAG", + .max_sectors = dasd_diag_max_sectors, .check_device = dasd_diag_check_device, .pe_handler = dasd_diag_pe_handler, .fill_geometry = dasd_diag_fill_geometry, - .setup_blk_queue = dasd_diag_setup_blk_queue, .start_IO = dasd_start_diag, .term_IO = dasd_diag_term_IO, .handle_terminated_request = dasd_diag_handle_terminated_request, diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8aade17d885c..373c1a86c33e 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -6826,17 +6826,9 @@ static void dasd_eckd_handle_hpf_error(struct dasd_device *device, dasd_schedule_requeue(device); } -/* - * Initialize block layer request queue. - */ -static void dasd_eckd_setup_blk_queue(struct dasd_block *block) +static unsigned int dasd_eckd_max_sectors(struct dasd_block *block) { - unsigned int logical_block_size = block->bp_block; - struct request_queue *q = block->gdp->queue; - struct dasd_device *device = block->base; - int max; - - if (device->features & DASD_FEATURE_USERAW) { + if (block->base->features & DASD_FEATURE_USERAW) { /* * the max_blocks value for raw_track access is 256 * it is higher than the native ECKD value because we @@ -6844,19 +6836,10 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) * so the max_hw_sectors are * 2048 x 512B = 1024kB = 16 tracks */ - max = DASD_ECKD_MAX_BLOCKS_RAW << block->s2b_shift; - } else { - max = DASD_ECKD_MAX_BLOCKS << block->s2b_shift; + return DASD_ECKD_MAX_BLOCKS_RAW << block->s2b_shift; } - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - q->limits.max_dev_sectors = max; - blk_queue_logical_block_size(q, logical_block_size); - blk_queue_max_hw_sectors(q, max); - blk_queue_max_segments(q, USHRT_MAX); - /* With page sized segments each segment can be translated into one idaw/tidaw */ - blk_queue_max_segment_size(q, PAGE_SIZE); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_dma_alignment(q, PAGE_SIZE - 1); + + return DASD_ECKD_MAX_BLOCKS << block->s2b_shift; } static struct ccw_driver dasd_eckd_driver = { @@ -6888,7 +6871,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .basic_to_ready = dasd_eckd_basic_to_ready, .online_to_ready = dasd_eckd_online_to_ready, .basic_to_known = dasd_eckd_basic_to_known, - .setup_blk_queue = dasd_eckd_setup_blk_queue, + .max_sectors = dasd_eckd_max_sectors, .fill_geometry = dasd_eckd_fill_geometry, .start_IO = dasd_start_IO, .term_IO = dasd_term_IO, diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 045e548630df..bcbb2f8e91fe 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -748,35 +748,9 @@ dasd_fba_dump_sense(struct dasd_device *device, struct dasd_ccw_req * req, free_page((unsigned long) page); } -/* - * Initialize block layer request queue. - */ -static void dasd_fba_setup_blk_queue(struct dasd_block *block) +static unsigned int dasd_fba_max_sectors(struct dasd_block *block) { - unsigned int logical_block_size = block->bp_block; - struct request_queue *q = block->gdp->queue; - unsigned int max_bytes, max_discard_sectors; - int max; - - max = DASD_FBA_MAX_BLOCKS << block->s2b_shift; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - q->limits.max_dev_sectors = max; - blk_queue_logical_block_size(q, logical_block_size); - blk_queue_max_hw_sectors(q, max); - blk_queue_max_segments(q, USHRT_MAX); - /* With page sized segments each segment can be translated into one idaw/tidaw */ - blk_queue_max_segment_size(q, PAGE_SIZE); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - - q->limits.discard_granularity = logical_block_size; - - /* Calculate max_discard_sectors and make it PAGE aligned */ - max_bytes = USHRT_MAX * logical_block_size; - max_bytes = ALIGN_DOWN(max_bytes, PAGE_SIZE); - max_discard_sectors = max_bytes / logical_block_size; - - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + return DASD_FBA_MAX_BLOCKS << block->s2b_shift; } static int dasd_fba_pe_handler(struct dasd_device *device, @@ -789,10 +763,11 @@ static struct dasd_discipline dasd_fba_discipline = { .owner = THIS_MODULE, .name = "FBA ", .ebcname = "FBA ", + .has_discard = true, .check_device = dasd_fba_check_characteristics, .do_analysis = dasd_fba_do_analysis, .pe_handler = dasd_fba_pe_handler, - .setup_blk_queue = dasd_fba_setup_blk_queue, + .max_sectors = dasd_fba_max_sectors, .fill_geometry = dasd_fba_fill_geometry, .start_IO = dasd_start_IO, .term_IO = dasd_term_IO, diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index a6c5f1fa2d87..b56d683a991d 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -293,6 +293,7 @@ struct dasd_discipline { struct module *owner; char ebcname[8]; /* a name used for tagging and printks */ char name[8]; /* a name used for tagging and printks */ + bool has_discard; struct list_head list; /* used for list of disciplines */ @@ -331,10 +332,7 @@ struct dasd_discipline { int (*online_to_ready) (struct dasd_device *); int (*basic_to_known)(struct dasd_device *); - /* - * Initialize block layer request queue. - */ - void (*setup_blk_queue)(struct dasd_block *); + unsigned int (*max_sectors)(struct dasd_block *); /* (struct dasd_device *); * Device operation functions. build_cp creates a ccw chain for * a block device request, start_io starts the request and -- cgit v1.2.3 From fde07a4d74e3b511105e0b6c9372d42376fbbecb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Feb 2024 05:37:42 -0800 Subject: dasd: use the atomic queue limits API Pass the constant limits directly to blk_mq_alloc_disk, set the nonrot flag there as well, and then use the commit API to change the transfer size and logical block size dependent values. This relies on the assumption that no I/O can be pending before the devices moves into the ready state and doesn't need extra freezing for changes to the queue limits. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20240228133742.806274-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 29 ++++++++++++----------------- drivers/s390/block/dasd_genhd.c | 13 ++++++++++++- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index bdeab447adfc..e8eb710bd25d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -308,7 +308,7 @@ static int dasd_state_basic_to_known(struct dasd_device *device) static int dasd_state_basic_to_ready(struct dasd_device *device) { struct dasd_block *block = device->block; - struct request_queue *q; + struct queue_limits lim; int rc = 0; /* make disk known with correct capacity */ @@ -328,31 +328,26 @@ static int dasd_state_basic_to_ready(struct dasd_device *device) goto out; } - q = block->gdp->queue; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - q->limits.max_dev_sectors = device->discipline->max_sectors(block); - blk_queue_max_hw_sectors(q, q->limits.max_dev_sectors); - blk_queue_logical_block_size(q, block->bp_block); - blk_queue_max_segments(q, USHRT_MAX); - - /* With page sized segments each segment can be translated into one idaw/tidaw */ - blk_queue_max_segment_size(q, PAGE_SIZE); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_dma_alignment(q, PAGE_SIZE - 1); + lim = queue_limits_start_update(block->gdp->queue); + lim.max_dev_sectors = device->discipline->max_sectors(block); + lim.max_hw_sectors = lim.max_dev_sectors; + lim.logical_block_size = block->bp_block; if (device->discipline->has_discard) { - unsigned int max_bytes, max_discard_sectors; + unsigned int max_bytes; - q->limits.discard_granularity = block->bp_block; + lim.discard_granularity = block->bp_block; /* Calculate max_discard_sectors and make it PAGE aligned */ max_bytes = USHRT_MAX * block->bp_block; max_bytes = ALIGN_DOWN(max_bytes, PAGE_SIZE); - max_discard_sectors = max_bytes / block->bp_block; - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + lim.max_hw_discard_sectors = max_bytes / block->bp_block; + lim.max_write_zeroes_sectors = lim.max_hw_discard_sectors; } + rc = queue_limits_commit_update(block->gdp->queue, &lim); + if (rc) + return rc; set_capacity(block->gdp, block->blocks << block->s2b_shift); device->state = DASD_STATE_READY; diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 0465b706745f..528e2d38d9bf 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -34,6 +34,16 @@ MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD d */ int dasd_gendisk_alloc(struct dasd_block *block) { + struct queue_limits lim = { + /* + * With page sized segments, each segment can be translated into + * one idaw/tidaw. + */ + .max_segment_size = PAGE_SIZE, + .seg_boundary_mask = PAGE_SIZE - 1, + .dma_alignment = PAGE_SIZE - 1, + .max_segments = USHRT_MAX, + }; struct gendisk *gdp; struct dasd_device *base; int len, rc; @@ -53,11 +63,12 @@ int dasd_gendisk_alloc(struct dasd_block *block) if (rc) return rc; - gdp = blk_mq_alloc_disk(&block->tag_set, NULL, block); + gdp = blk_mq_alloc_disk(&block->tag_set, &lim, block); if (IS_ERR(gdp)) { blk_mq_free_tag_set(&block->tag_set); return PTR_ERR(gdp); } + blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue); /* Initialize gendisk structure. */ gdp->major = DASD_MAJOR; -- cgit v1.2.3 From f8c7511db009d42e2c24e48eeb04e3f1b67ab209 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Tue, 5 Mar 2024 16:32:16 -0300 Subject: block: make block_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the block_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305-class_cleanup-block-v1-1-130bb27b9c72@marliere.net Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- drivers/base/base.h | 2 +- include/linux/blkdev.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index 84c822d989da..a214f9cf3a35 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1201,7 +1201,7 @@ static int block_uevent(const struct device *dev, struct kobj_uevent_env *env) return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq); } -struct class block_class = { +const struct class block_class = { .name = "block", .dev_uevent = block_uevent, }; diff --git a/drivers/base/base.h b/drivers/base/base.h index eb4c0ace9242..0738ccad08b2 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -207,7 +207,7 @@ static inline int devtmpfs_init(void) { return 0; } #endif #ifdef CONFIG_BLOCK -extern struct class block_class; +extern const struct class block_class; static inline bool is_blockdev(struct device *dev) { return dev->class == &block_class; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 285e82723d64..19c7596f4ebf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -42,7 +42,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. -- cgit v1.2.3 From aa067325c05dc3a3aac588f40cacf8418f916cee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:35 -0700 Subject: drbd: pass the max_hw_sectors limit to blk_alloc_disk Pass a queue_limits structure with the max_hw_sectors limit to blk_alloc_disk instead of updating the limit on the allocated gendisk. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cea1e537fd56..113b441d4d36 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig int id; int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; + struct queue_limits lim = { + /* + * Setting the max_hw_sectors to an odd value of 8kibyte here. + * This triggers a max_bio_size message upon first attach or + * connect. + */ + .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, + }; device = minor_to_device(minor); if (device) @@ -2708,7 +2716,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_no_disk; @@ -2729,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); - /* Setting the max_hw_sectors to an odd value of 8kibyte here - This triggers a max_bio_size message upon first attach or connect */ - blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) -- cgit v1.2.3 From 342d81fde24152adf9747e6e126c8c3179d1a54c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:36 -0700 Subject: drbd: refactor drbd_reconsider_queue_parameters Split out a drbd_max_peer_bio_size helper for the peer I/O size, and condense the various checks to a nested min3(..., max())) instead of using a lot of local variables. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 84 ++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43747a1aae43..9135001a8e57 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1189,6 +1189,33 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } +static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) +{ + /* + * We may ignore peer limits if the peer is modern enough. From 8.3.8 + * onwards the peer can use multiple BIOs for a single peer_request. + */ + if (device->state.conn < C_WF_REPORT_PARAMS) + return device->peer_max_bio_size; + + if (first_peer_device(device)->connection->agreed_pro_version < 94) + return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + + /* + * Correct old drbd (up to 8.3.7) if it believes it can do more than + * 32KiB. + */ + if (first_peer_device(device)->connection->agreed_pro_version == 94) + return DRBD_MAX_SIZE_H80_PACKET; + + /* + * drbd 8.3.8 onwards, before 8.4.0 + */ + if (first_peer_device(device)->connection->agreed_pro_version < 100) + return DRBD_MAX_BIO_SIZE_P95; + return DRBD_MAX_BIO_SIZE; +} + static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) { q->limits.discard_granularity = granularity; @@ -1303,48 +1330,35 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi fixup_discard_support(device, q); } -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) +void drbd_reconsider_queue_parameters(struct drbd_device *device, + struct drbd_backing_dev *bdev, struct o_qlim *o) { - unsigned int now, new, local, peer; - - now = queue_max_hw_sectors(device->rq_queue) << 9; - local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ + unsigned int now = queue_max_hw_sectors(device->rq_queue) << + SECTOR_SHIFT; + unsigned int new; if (bdev) { - local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; - device->local_max_bio_size = local; - } - local = min(local, DRBD_MAX_BIO_SIZE); - - /* We may ignore peer limits if the peer is modern enough. - Because new from 8.3.8 onwards the peer can use multiple - BIOs for a single peer_request */ - if (device->state.conn >= C_WF_REPORT_PARAMS) { - if (first_peer_device(device)->connection->agreed_pro_version < 94) - peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (first_peer_device(device)->connection->agreed_pro_version == 94) - peer = DRBD_MAX_SIZE_H80_PACKET; - else if (first_peer_device(device)->connection->agreed_pro_version < 100) - peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ - else - peer = DRBD_MAX_BIO_SIZE; + struct request_queue *b = bdev->backing_bdev->bd_disk->queue; - /* We may later detach and re-attach on a disconnected Primary. - * Avoid this setting to jump back in that case. - * We want to store what we know the peer DRBD can handle, - * not what the peer IO backend can handle. */ - if (peer > device->peer_max_bio_size) - device->peer_max_bio_size = peer; + device->local_max_bio_size = + queue_max_hw_sectors(b) << SECTOR_SHIFT; } - new = min(local, peer); - if (device->state.role == R_PRIMARY && new < now) - drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); - - if (new != now) + /* + * We may later detach and re-attach on a disconnected Primary. Avoid + * decreasing the value in this case. + * + * We want to store what we know the peer DRBD can handle, not what the + * peer IO backend can handle. + */ + new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, + max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); + if (new != now) { + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", + new, now); drbd_info(device, "max BIO size = %u\n", new); + } drbd_setup_queue_param(device, bdev, new, o); } -- cgit v1.2.3 From 2828908d5cc8396e7c91d04d67e03ed834234bcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:28 +0100 Subject: drbd: refactor the backing dev max_segments calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out a drbd_backing_dev_max_segments helper that checks the backing device limitation. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-4-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9135001a8e57..0326b7322ceb 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1295,30 +1295,39 @@ static void fixup_discard_support(struct drbd_device *device, struct request_que } } +/* This is the workaround for "bio would need to, but cannot, be split" */ +static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) +{ + unsigned int max_segments; + + rcu_read_lock(); + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); + + if (!max_segments) + return BLK_MAX_SEGMENTS; + return max_segments; +} + static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int max_bio_size, struct o_qlim *o) { struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = 0; + unsigned int max_segments = BLK_MAX_SEGMENTS; struct request_queue *b = NULL; - struct disk_conf *dc; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - max_segments = dc->max_bio_bvecs; - rcu_read_unlock(); + max_segments = drbd_backing_dev_max_segments(device); blk_set_stacking_limits(&q->limits); } blk_queue_max_hw_sectors(q, max_hw_sectors); - /* This is the workaround for "bio would need to, but cannot, be split" */ - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); + blk_queue_max_segments(q, max_segments); blk_queue_segment_boundary(q, PAGE_SIZE-1); decide_on_discard_support(device, bdev); -- cgit v1.2.3 From e16344e506314e35b1a5a8ccd7b88f4b1844ebb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:29 +0100 Subject: drbd: merge drbd_setup_queue_param into drbd_reconsider_queue_parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drbd_setup_queue_param is only called by drbd_reconsider_queue_parameters and there is no really clear boundary of responsibilities between the two. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-5-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 56 +++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0326b7322ceb..0f40fdee0899 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1309,45 +1309,16 @@ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) return max_segments; } -static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size, struct o_qlim *o) -{ - struct request_queue * const q = device->rq_queue; - unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = BLK_MAX_SEGMENTS; - struct request_queue *b = NULL; - - if (bdev) { - b = bdev->backing_bdev->bd_disk->queue; - - max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - max_segments = drbd_backing_dev_max_segments(device); - - blk_set_stacking_limits(&q->limits); - } - - blk_queue_max_hw_sectors(q, max_hw_sectors); - blk_queue_max_segments(q, max_segments); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, bdev); - - if (b) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); - } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); -} - void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) { - unsigned int now = queue_max_hw_sectors(device->rq_queue) << - SECTOR_SHIFT; + struct request_queue * const q = device->rq_queue; + unsigned int now = queue_max_hw_sectors(q) << 9; + struct request_queue *b = NULL; unsigned int new; if (bdev) { - struct request_queue *b = bdev->backing_bdev->bd_disk->queue; + b = bdev->backing_bdev->bd_disk->queue; device->local_max_bio_size = queue_max_hw_sectors(b) << SECTOR_SHIFT; @@ -1369,7 +1340,24 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, drbd_info(device, "max BIO size = %u\n", new); } - drbd_setup_queue_param(device, bdev, new, o); + if (bdev) { + blk_set_stacking_limits(&q->limits); + blk_queue_max_segments(q, + drbd_backing_dev_max_segments(device)); + } else { + blk_queue_max_segments(q, BLK_MAX_SEGMENTS); + } + + blk_queue_max_hw_sectors(q, new >> SECTOR_SHIFT); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); + decide_on_discard_support(device, bdev); + + if (bdev) { + blk_stack_limits(&q->limits, &b->limits, 0); + disk_update_readahead(device->vdisk); + } + fixup_write_zeroes(device, q); + fixup_discard_support(device, q); } /* Starts the worker thread */ -- cgit v1.2.3 From e3992e02c970f6eb803b98b9f733cad40f190161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:30 +0100 Subject: drbd: don't set max_write_zeroes_sectors in decide_on_discard_support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixup_write_zeroes always overrides the max_write_zeroes_sectors value a little further down the callchain, so don't bother to setup a limit in decide_on_discard_support. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-6-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0f40fdee0899..a79b7fe5335d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1260,7 +1260,6 @@ static void decide_on_discard_support(struct drbd_device *device, blk_queue_discard_granularity(q, 512); max_discard_sectors = drbd_max_discard_sectors(connection); blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); return; not_supported: -- cgit v1.2.3 From 5eaee6e9c8f9940ecee93678972774fb8dd450d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:31 +0100 Subject: drbd: split out a drbd_discard_supported helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to check if discard is supported for a given connection / backing device combination. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-7-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a79b7fe5335d..94ed2b3ea636 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1231,24 +1231,33 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) return AL_EXTENT_SIZE >> 9; } -static void decide_on_discard_support(struct drbd_device *device, +static bool drbd_discard_supported(struct drbd_connection *connection, struct drbd_backing_dev *bdev) { - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) - goto not_supported; + return false; if (connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - goto not_supported; + return false; } + return true; +} + +static void decide_on_discard_support(struct drbd_device *device, + struct drbd_backing_dev *bdev) +{ + struct drbd_connection *connection = + first_peer_device(device)->connection; + struct request_queue *q = device->rq_queue; + unsigned int max_discard_sectors; + + if (!drbd_discard_supported(connection, bdev)) + goto not_supported; + /* * We don't care for the granularity, really. * -- cgit v1.2.3 From e6dfe748f09e37f77437bd337f891f5b57d5d5a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:41 -0700 Subject: drbd: atomically update queue limits in drbd_reconsider_queue_parameters Switch drbd_reconsider_queue_parameters to set up the queue parameters in an on-stack queue_limits structure and apply the atomically. Remove various helpers that have become so trivial that they can be folded into drbd_reconsider_queue_parameters. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 119 +++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 73 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 94ed2b3ea636..fbd92803dc1d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1216,11 +1216,6 @@ static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) return DRBD_MAX_BIO_SIZE; } -static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) -{ - q->limits.discard_granularity = granularity; -} - static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) { /* when we introduced REQ_WRITE_SAME support, we also bumped @@ -1247,62 +1242,6 @@ static bool drbd_discard_supported(struct drbd_connection *connection, return true; } -static void decide_on_discard_support(struct drbd_device *device, - struct drbd_backing_dev *bdev) -{ - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - - if (!drbd_discard_supported(connection, bdev)) - goto not_supported; - - /* - * We don't care for the granularity, really. - * - * Stacking limits below should fix it for the local device. Whether or - * not it is a suitable granularity on the remote device is not our - * problem, really. If you care, you need to use devices with similar - * topology on all peers. - */ - blk_queue_discard_granularity(q, 512); - max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_max_discard_sectors(q, max_discard_sectors); - return; - -not_supported: - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); -} - -static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) -{ - /* Fixup max_write_zeroes_sectors after blk_stack_limits(): - * if we can handle "zeroes" efficiently on the protocol, - * we want to do that, even if our backend does not announce - * max_write_zeroes_sectors itself. */ - struct drbd_connection *connection = first_peer_device(device)->connection; - /* If the peer announces WZEROES support, use it. Otherwise, rather - * send explicit zeroes than rely on some discard-zeroes-data magic. */ - if (connection->agreed_features & DRBD_FF_WZEROES) - q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; - else - q->limits.max_write_zeroes_sectors = 0; -} - -static void fixup_discard_support(struct drbd_device *device, struct request_queue *q) -{ - unsigned int max_discard = device->rq_queue->limits.max_discard_sectors; - unsigned int discard_granularity = - device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT; - - if (discard_granularity > max_discard) { - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); - } -} - /* This is the workaround for "bio would need to, but cannot, be split" */ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) { @@ -1320,8 +1259,11 @@ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) { + struct drbd_connection *connection = + first_peer_device(device)->connection; struct request_queue * const q = device->rq_queue; unsigned int now = queue_max_hw_sectors(q) << 9; + struct queue_limits lim; struct request_queue *b = NULL; unsigned int new; @@ -1348,24 +1290,55 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, drbd_info(device, "max BIO size = %u\n", new); } + lim = queue_limits_start_update(q); if (bdev) { - blk_set_stacking_limits(&q->limits); - blk_queue_max_segments(q, - drbd_backing_dev_max_segments(device)); + blk_set_stacking_limits(&lim); + lim.max_segments = drbd_backing_dev_max_segments(device); } else { - blk_queue_max_segments(q, BLK_MAX_SEGMENTS); + lim.max_segments = BLK_MAX_SEGMENTS; } - blk_queue_max_hw_sectors(q, new >> SECTOR_SHIFT); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - decide_on_discard_support(device, bdev); + lim.max_hw_sectors = new >> SECTOR_SHIFT; + lim.seg_boundary_mask = PAGE_SIZE - 1; - if (bdev) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + if (drbd_discard_supported(connection, bdev)) { + lim.discard_granularity = 512; + lim.max_hw_discard_sectors = + drbd_max_discard_sectors(connection); + } else { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); + + if (bdev) + blk_stack_limits(&lim, &b->limits, 0); + + /* + * If we can handle "zeroes" efficiently on the protocol, we want to do + * that, even if our backend does not announce max_write_zeroes_sectors + * itself. + */ + if (connection->agreed_features & DRBD_FF_WZEROES) + lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + lim.max_write_zeroes_sectors = 0; + + if ((lim.discard_granularity >> SECTOR_SHIFT) > + lim.max_hw_discard_sectors) { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; + } + + if (queue_limits_commit_update(q, &lim)) + drbd_err(device, "setting new queue limits failed\n"); } /* Starts the worker thread */ -- cgit v1.2.3 From f98364e926626c678fb4b9004b75cacf92ff0662 Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Tue, 5 Mar 2024 16:20:48 +0800 Subject: aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts This patch is against CVE-2023-6270. The description of cve is: A flaw was found in the ATA over Ethernet (AoE) driver in the Linux kernel. The aoecmd_cfg_pkts() function improperly updates the refcnt on `struct net_device`, and a use-after-free can be triggered by racing between the free on the struct and the access through the `skbtxq` global queue. This could lead to a denial of service condition or potential code execution. In aoecmd_cfg_pkts(), it always calls dev_put(ifp) when skb initial code is finished. But the net_device ifp will still be used in later tx()->dev_queue_xmit() in kthread. Which means that the dev_put(ifp) should NOT be called in the success path of skb initial code in aoecmd_cfg_pkts(). Otherwise tx() may run into use-after-free because the net_device is freed. This patch removed the dev_put(ifp) in the success path in aoecmd_cfg_pkts(), and added dev_put() after skb xmit in tx(). Link: https://nvd.nist.gov/vuln/detail/CVE-2023-6270 Fixes: 7562f876cd93 ("[NET]: Rework dev_base via list_head (v3)") Signed-off-by: Chun-Yi Lee Link: https://lore.kernel.org/r/20240305082048.25526-1-jlee@suse.com Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 12 ++++++------ drivers/block/aoe/aoenet.c | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d7317425be51..cc9077b588d7 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu rcu_read_lock(); for_each_netdev_rcu(&init_net, ifp) { dev_hold(ifp); - if (!is_aoe_netif(ifp)) - goto cont; + if (!is_aoe_netif(ifp)) { + dev_put(ifp); + continue; + } skb = new_skb(sizeof *h + sizeof *ch); if (skb == NULL) { printk(KERN_INFO "aoe: skb alloc failure\n"); - goto cont; + dev_put(ifp); + continue; } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; @@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; - -cont: - dev_put(ifp); } rcu_read_unlock(); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index c51ea95bc2ce..923a134fd766 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -63,6 +63,7 @@ tx(int id) __must_hold(&txlock) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); + dev_put(ifp); spin_lock_irq(&txlock); } return 0; -- cgit v1.2.3 From 0e46064ebebb90b02c53283106f26600aa38c986 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 2 Mar 2024 04:26:37 +0900 Subject: virtio_blk: Do not use disk_set_max_open/active_zones() In virtblk_read_zoned_limits(), setting a zoned block device maximum number of open and active zones using the functions disk_set_max_open_zones() and disk_set_max_active_zones() is incorrect as setting the limits for the request queue is now done atomically when the gendisk is created (with blk_mq_alloc_disk()). The value set by the disk_set_max_open/active_zones() functions will be overwritten. Fix this by setting the maximum number of open and active zones directly in the queue_limits structure passed to virtblk_read_zoned_limits(). Fixes: 8b837256560c ("virtio_blk: pass queue_limits to blk_mq_alloc_disk") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240301192639.410183-2-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d8b55874cd59..aa9f86507719 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -732,12 +732,12 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, v); + lim->max_open_zones = v; dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, v); + lim->max_active_zones = v; dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, -- cgit v1.2.3 From 34a2cf3fbef17deee2d4d28c41e3cb8ac1929fda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Feb 2024 11:48:26 +0100 Subject: bcache: move calculation of stripe_size and io_opt into bcache_device_init bcache currently calculates the stripe size for the non-cached_dev case directly in bcache_device_init, but for the cached_dev case it does it in the caller. Consolidate it in one places, which also enables setting the io_opt queue_limit before allocating the gendisk so that it can be passed in instead of changing the limit just after the allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Coly Li Link: https://lore.kernel.org/r/20240226104826.283067-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d06a9649d302..f716c3265f5c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -913,6 +913,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, uint64_t n; int idx; + if (cached_bdev) { + d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT; + lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev)); + } if (!d->stripe_size) d->stripe_size = 1 << 31; else if (d->stripe_size < BCH_MIN_STRIPE_SZ) @@ -1418,9 +1422,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } - dc->disk.stripe_size = q->limits.io_opt >> 9; - - if (dc->disk.stripe_size) + if (bdev_io_opt(dc->bdev)) dc->partial_stripes_expensive = q->limits.raid_partial_stripes_expensive; @@ -1430,9 +1432,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) if (ret) return ret; - blk_queue_io_opt(dc->disk.disk->queue, - max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); - atomic_set(&dc->io_errors, 0); dc->io_disable = false; dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; -- cgit v1.2.3 From c396b90e502691fc6ff7b43984cfd9d1b15aaa80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:40 -0700 Subject: md: add a mddev_trace_remap helper Add a helper to trace bio remapping that hides some argument dereferences and the check for a DM-mapped MD device. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-2-hch@lst.de --- drivers/md/md.c | 6 +----- drivers/md/md.h | 8 ++++++++ drivers/md/raid0.c | 5 +---- drivers/md/raid1.c | 11 ++--------- drivers/md/raid10.c | 10 ++-------- drivers/md/raid5.c | 14 +++----------- 6 files changed, 17 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 64d544f09295..a52642ecd394 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -65,7 +65,6 @@ #include #include -#include #include "md.h" #include "md-bitmap.h" #include "md-cluster.h" @@ -8686,10 +8685,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, bio_chain(discard_bio, bio); bio_clone_blkg_association(discard_bio, bio); - if (mddev->gendisk) - trace_block_bio_remap(discard_bio, - disk_devt(mddev->gendisk), - bio->bi_iter.bi_sector); + mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector); submit_bio_noacct(discard_bio); } EXPORT_SYMBOL_GPL(md_submit_discard_bio); diff --git a/drivers/md/md.h b/drivers/md/md.h index 556db28a49aa..928086b638a3 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "md-cluster.h" #define MaxSector (~(sector_t)0) @@ -910,4 +911,11 @@ int do_md_run(struct mddev *mddev); extern const struct block_device_operations md_fops; +static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, + sector_t sector) +{ + if (mddev->gendisk) + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); +} + #endif /* _MD_MD_H */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c50a7abda744..aff094de9743 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -578,10 +578,7 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio) bio_set_dev(bio, tmp_dev->bdev); bio->bi_iter.bi_sector = sector + zone->dev_start + tmp_dev->data_offset; - - if (mddev->gendisk) - trace_block_bio_remap(bio, disk_devt(mddev->gendisk), - bio_sector); + mddev_trace_remap(mddev, bio, bio_sector); mddev_check_write_zeroes(mddev, bio); submit_bio_noacct(bio); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index afca975ec7f3..421154430f24 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1418,11 +1418,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, test_bit(R1BIO_FailFast, &r1_bio->state)) read_bio->bi_opf |= MD_FAILFAST; read_bio->bi_private = r1_bio; - - if (mddev->gendisk) - trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), - r1_bio->sector); - + mddev_trace_remap(mddev, read_bio, r1_bio->sector); submit_bio_noacct(read_bio); } @@ -1655,10 +1651,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_private = r1_bio; atomic_inc(&r1_bio->remaining); - - if (mddev->gendisk) - trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), - r1_bio->sector); + mddev_trace_remap(mddev, mbio, r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_bdev = (void *)rdev; if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8aecdb1ccc16..9335a1620e6c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1235,10 +1235,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, test_bit(R10BIO_FailFast, &r10_bio->state)) read_bio->bi_opf |= MD_FAILFAST; read_bio->bi_private = r10_bio; - - if (mddev->gendisk) - trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), - r10_bio->sector); + mddev_trace_remap(mddev, read_bio, r10_bio->sector); submit_bio_noacct(read_bio); return; } @@ -1274,10 +1271,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, && enough(conf, devnum)) mbio->bi_opf |= MD_FAILFAST; mbio->bi_private = r10_bio; - - if (conf->mddev->gendisk) - trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), - r10_bio->sector); + mddev_trace_remap(mddev, mbio, r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_bdev = (void *)rdev; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 58f8395b56d6..f0bc96f68665 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1295,10 +1295,7 @@ again: if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); - if (conf->mddev->gendisk) - trace_block_bio_remap(bi, - disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + mddev_trace_remap(conf->mddev, bi, sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, bi); else @@ -1342,10 +1339,7 @@ again: */ if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; - if (conf->mddev->gendisk) - trace_block_bio_remap(rbi, - disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + mddev_trace_remap(conf->mddev, rbi, sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, rbi); else @@ -5523,9 +5517,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); } - if (mddev->gendisk) - trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), - raid_bio->bi_iter.bi_sector); + mddev_trace_remap(mddev, align_bio, raid_bio->bi_iter.bi_sector); submit_bio_noacct(align_bio); return 1; } -- cgit v1.2.3 From 28be4fd310d146e9a43d7b1bb55cb7e9f5e06e88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:41 -0700 Subject: md: add a mddev_add_trace_msg helper Add a small wrapper around blk_add_trace_msg that hides some argument dereferences and the check for a DM-mapped MD device. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-3-hch@lst.de --- drivers/md/md-bitmap.c | 9 +++------ drivers/md/md.c | 3 +-- drivers/md/md.h | 6 ++++++ drivers/md/raid1.c | 10 ++++------ drivers/md/raid10.c | 15 +++++++-------- drivers/md/raid5.c | 14 +++++++------- 6 files changed, 28 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index a4976ceae868..059afc24c08b 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1046,9 +1046,8 @@ void md_bitmap_unplug(struct bitmap *bitmap) if (dirty || need_write) { if (!writing) { md_bitmap_wait_writes(bitmap); - if (bitmap->mddev->queue) - blk_add_trace_msg(bitmap->mddev->queue, - "md bitmap_unplug"); + mddev_add_trace_msg(bitmap->mddev, + "md bitmap_unplug"); } clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); filemap_write_page(bitmap, i, false); @@ -1319,9 +1318,7 @@ void md_bitmap_daemon_work(struct mddev *mddev) } bitmap->allclean = 1; - if (bitmap->mddev->queue) - blk_add_trace_msg(bitmap->mddev->queue, - "md bitmap_daemon_work"); + mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); /* Any file-page which is PENDING now needs to be written. * So set NEEDWRITE now, then after we make any last-minute changes diff --git a/drivers/md/md.c b/drivers/md/md.c index a52642ecd394..ab231537c476 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2856,8 +2856,7 @@ repeat: pr_debug("md: updating %s RAID superblock on device (in sync %d)\n", mdname(mddev), mddev->in_sync); - if (mddev->queue) - blk_add_trace_msg(mddev->queue, "md md_update_sb"); + mddev_add_trace_msg(mddev, "md md_update_sb"); rewrite: md_bitmap_update_sb(mddev->bitmap); rdev_for_each(rdev, mddev) { diff --git a/drivers/md/md.h b/drivers/md/md.h index 928086b638a3..e2e1ddd397ea 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -918,4 +918,10 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); } +#define mddev_add_trace_msg(mddev, fmt, args...) \ +do { \ + if ((mddev)->gendisk) \ + blk_add_trace_msg((mddev)->queue, fmt, ##args); \ +} while (0) + #endif /* _MD_MD_H */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 421154430f24..05870a4565fc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -46,9 +46,6 @@ static void allow_barrier(struct r1conf *conf, sector_t sector_nr); static void lower_barrier(struct r1conf *conf, sector_t sector_nr); -#define raid1_log(md, fmt, args...) \ - do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) - #define RAID_1_10_NAME "raid1" #include "raid1-10.c" @@ -1196,7 +1193,7 @@ static void freeze_array(struct r1conf *conf, int extra) */ spin_lock_irq(&conf->resync_lock); conf->array_frozen = 1; - raid1_log(conf->mddev, "wait freeze"); + mddev_add_trace_msg(conf->mddev, "raid1 wait freeze"); wait_event_lock_irq_cmd( conf->wait_barrier, get_unqueued_pending(conf) == extra, @@ -1385,7 +1382,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, * Reading from a write-mostly device must take care not to * over-take any writes that are 'behind' */ - raid1_log(mddev, "wait behind writes"); + mddev_add_trace_msg(mddev, "raid1 wait behind writes"); wait_event(bitmap->behind_wait, atomic_read(&bitmap->behind_writes) == 0); } @@ -1568,7 +1565,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, bio_wouldblock_error(bio); return; } - raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk); + mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked", + blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); wait_barrier(conf, bio->bi_iter.bi_sector, false); goto retry_write; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9335a1620e6c..1447cb1e4414 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -76,9 +76,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); static void end_reshape_write(struct bio *bio); static void end_reshape(struct r10conf *conf); -#define raid10_log(md, fmt, args...) \ - do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) - #include "raid1-10.c" #define NULL_CMD @@ -1019,7 +1016,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) ret = false; } else { conf->nr_waiting++; - raid10_log(conf->mddev, "wait barrier"); + mddev_add_trace_msg(conf->mddev, "raid10 wait barrier"); wait_event_barrier(conf, stop_waiting_barrier(conf)); conf->nr_waiting--; } @@ -1138,7 +1135,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, bio_wouldblock_error(bio); return false; } - raid10_log(conf->mddev, "wait reshape"); + mddev_add_trace_msg(conf->mddev, "raid10 wait reshape"); wait_event(conf->wait_barrier, conf->reshape_progress <= bio->bi_iter.bi_sector || conf->reshape_progress >= bio->bi_iter.bi_sector + @@ -1336,8 +1333,9 @@ retry_wait: if (unlikely(blocked_rdev)) { /* Have to wait for this device to get unblocked, then retry */ allow_barrier(conf); - raid10_log(conf->mddev, "%s wait rdev %d blocked", - __func__, blocked_rdev->raid_disk); + mddev_add_trace_msg(conf->mddev, + "raid10 %s wait rdev %d blocked", + __func__, blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); wait_barrier(conf, false); goto retry_wait; @@ -1392,7 +1390,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio_wouldblock_error(bio); return; } - raid10_log(conf->mddev, "wait reshape metadata"); + mddev_add_trace_msg(conf->mddev, + "raid10 wait reshape metadata"); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f0bc96f68665..c082b07d3f63 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4195,10 +4195,9 @@ static int handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_HANDLE, &sh->state); if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { /* prefer read-modify-write, but need to get some data */ - if (conf->mddev->queue) - blk_add_trace_msg(conf->mddev->queue, - "raid5 rmw %llu %d", - (unsigned long long)sh->sector, rmw); + mddev_add_trace_msg(conf->mddev, "raid5 rmw %llu %d", + sh->sector, rmw); + for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_InJournal, &dev->flags) && @@ -4276,9 +4275,10 @@ static int handle_stripe_dirtying(struct r5conf *conf, } } if (rcw && conf->mddev->queue) - blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", - (unsigned long long)sh->sector, - rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); + mddev_add_trace_msg(conf->mddev, + "raid5 rcw %llu %d %d %d", + sh->sector, rcw, qread, + test_bit(STRIPE_DELAYED, &sh->state)); } if (rcw > disks && rmw > disks && -- cgit v1.2.3 From 176df894d7974166c65d0cce3b3b019678f9e698 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:42 -0700 Subject: md: add a mddev_is_dm helper Add a helper to check for a DM-mapped MD device instead of using the obfuscated ->gendisk or ->queue NULL checks. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-4-hch@lst.de --- drivers/md/md.c | 15 +++++++-------- drivers/md/md.h | 12 ++++++++++-- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 13 +++++-------- drivers/md/raid10.c | 10 +++++----- drivers/md/raid5.c | 21 ++++++++++----------- 6 files changed, 38 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index ab231537c476..2d93ea127b82 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2410,7 +2410,7 @@ int md_integrity_register(struct mddev *mddev) if (list_empty(&mddev->disks)) return 0; /* nothing to do */ - if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) + if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk)) return 0; /* shouldn't register, or already is */ rdev_for_each(rdev, mddev) { /* skip spares and non-functional disks */ @@ -2463,7 +2463,7 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) { struct blk_integrity *bi_mddev; - if (!mddev->gendisk) + if (mddev_is_dm(mddev)) return 0; bi_mddev = blk_get_integrity(mddev->gendisk); @@ -5977,7 +5977,7 @@ int md_run(struct mddev *mddev) invalidate_bdev(rdev->bdev); if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) { mddev->ro = MD_RDONLY; - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) set_disk_ro(mddev->gendisk, 1); } @@ -6139,7 +6139,7 @@ int md_run(struct mddev *mddev) } } - if (mddev->queue) { + if (!mddev_is_dm(mddev)) { bool nonrot = true; rdev_for_each(rdev, mddev) { @@ -6404,7 +6404,7 @@ static void mddev_detach(struct mddev *mddev) mddev->pers->quiesce(mddev, 0); } md_unregister_thread(mddev, &mddev->thread); - if (mddev->queue) + if (!mddev_is_dm(mddev)) blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ } @@ -7360,10 +7360,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) if (!rv) { if (mddev_is_clustered(mddev)) md_cluster_ops->update_size(mddev, old_dev_sectors); - else if (mddev->queue) { + else if (!mddev_is_dm(mddev)) set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); - } } return rv; } @@ -9177,7 +9176,7 @@ void md_do_sync(struct md_thread *thread) mddev->delta_disks > 0 && mddev->pers->finish_reshape && mddev->pers->size && - mddev->queue) { + !mddev_is_dm(mddev)) { mddev_lock_nointr(mddev); md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); mddev_unlock(mddev); diff --git a/drivers/md/md.h b/drivers/md/md.h index e2e1ddd397ea..d5e99653c690 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -911,16 +911,24 @@ int do_md_run(struct mddev *mddev); extern const struct block_device_operations md_fops; +/* + * MD devices can be used undeneath by DM, in which case ->gendisk is NULL. + */ +static inline bool mddev_is_dm(struct mddev *mddev) +{ + return !mddev->gendisk; +} + static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, sector_t sector) { - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); } #define mddev_add_trace_msg(mddev, fmt, args...) \ do { \ - if ((mddev)->gendisk) \ + if (!mddev_is_dm(mddev)) \ blk_add_trace_msg((mddev)->queue, fmt, ##args); \ } while (0) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index aff094de9743..9f787ae77ede 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,7 +399,7 @@ static int raid0_run(struct mddev *mddev) mddev->private = conf; } conf = mddev->private; - if (mddev->queue) { + if (!mddev_is_dm(mddev)) { struct md_rdev *rdev; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 05870a4565fc..dd1393d0f084 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1926,7 +1926,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) for (mirror = first; mirror <= last; mirror++) { p = conf->mirrors + mirror; if (!p->rdev) { - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); @@ -3227,14 +3227,11 @@ static int raid1_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (mddev->queue) + if (!mddev_is_dm(mddev)) { blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - - rdev_for_each(rdev, mddev) { - if (!mddev->gendisk) - continue; - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + rdev_for_each(rdev, mddev) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); } mddev->degraded = 0; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1447cb1e4414..4021cf06b3a6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2106,7 +2106,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) continue; } - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); @@ -2126,7 +2126,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) set_bit(Replacement, &rdev->flags); rdev->raid_disk = repl_slot; err = 0; - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); conf->fullsync = 1; @@ -4014,7 +4014,7 @@ static int raid10_run(struct mddev *mddev) } } - if (mddev->queue) { + if (!mddev_is_dm(conf->mddev)) { blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); raid10_set_io_opt(conf); @@ -4048,7 +4048,7 @@ static int raid10_run(struct mddev *mddev) if (first || diff < min_offset_diff) min_offset_diff = diff; - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); @@ -4933,7 +4933,7 @@ static void end_reshape(struct r10conf *conf) conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); - if (conf->mddev->queue) + if (!mddev_is_dm(conf->mddev)) raid10_set_io_opt(conf); conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c082b07d3f63..f8a81176eff8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2416,12 +2416,12 @@ static int grow_stripes(struct r5conf *conf, int num) size_t namelen = sizeof(conf->cache_name[0]); int devs = max(conf->raid_disks, conf->previous_raid_disks); - if (conf->mddev->gendisk) + if (mddev_is_dm(conf->mddev)) snprintf(conf->cache_name[0], namelen, - "raid%d-%s", conf->level, mdname(conf->mddev)); + "raid%d-%p", conf->level, conf->mddev); else snprintf(conf->cache_name[0], namelen, - "raid%d-%p", conf->level, conf->mddev); + "raid%d-%s", conf->level, mdname(conf->mddev)); snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); conf->active_name = 0; @@ -4274,11 +4274,10 @@ static int handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_DELAYED, &sh->state); } } - if (rcw && conf->mddev->queue) - mddev_add_trace_msg(conf->mddev, - "raid5 rcw %llu %d %d %d", - sh->sector, rcw, qread, - test_bit(STRIPE_DELAYED, &sh->state)); + if (rcw && !mddev_is_dm(conf->mddev)) + blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", + (unsigned long long)sh->sector, + rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); } if (rcw > disks && rmw > disks && @@ -5686,7 +5685,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) } release_inactive_stripe_list(conf, cb->temp_inactive_list, NR_STRIPE_HASH_LOCKS); - if (mddev->queue) + if (!mddev_is_dm(mddev)) trace_block_unplug(mddev->queue, cnt, !from_schedule); kfree(cb); } @@ -7960,7 +7959,7 @@ static int raid5_run(struct mddev *mddev) mdname(mddev)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); - if (mddev->queue) { + if (!mddev_is_dm(mddev)) { int chunk_size; /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the @@ -8564,7 +8563,7 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - if (conf->mddev->queue) + if (!mddev_is_dm(conf->mddev)) raid5_set_io_opt(conf); } } -- cgit v1.2.3 From e305fce1883128a9468efe1876a057df48a261d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:43 -0700 Subject: md: add queue limit helpers Add a few helpers that wrap the block queue limits API for use in MD. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-5-hch@lst.de --- drivers/md/md.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ drivers/md/md.h | 3 +++ 2 files changed, 48 insertions(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 2d93ea127b82..7c9fb7dcf0d4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5751,6 +5751,51 @@ static const struct kobj_type md_ktype = { int mdp_major = 0; +/* stack the limit for all rdevs into lim */ +void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim) +{ + struct md_rdev *rdev; + + rdev_for_each(rdev, mddev) { + queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset, + mddev->gendisk->disk_name); + } +} +EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits); + +/* apply the extra stacking limits from a new rdev into mddev */ +int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev) +{ + struct queue_limits lim; + + if (mddev_is_dm(mddev)) + return 0; + + lim = queue_limits_start_update(mddev->queue); + queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset, + mddev->gendisk->disk_name); + return queue_limits_commit_update(mddev->queue, &lim); +} +EXPORT_SYMBOL_GPL(mddev_stack_new_rdev); + +/* update the optimal I/O size after a reshape */ +void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes) +{ + struct queue_limits lim; + + if (mddev_is_dm(mddev)) + return; + + /* don't bother updating io_opt if we can't suspend the array */ + if (mddev_suspend(mddev, false) < 0) + return; + lim = queue_limits_start_update(mddev->gendisk->queue); + lim.io_opt = lim.io_min * nr_stripes; + queue_limits_commit_update(mddev->gendisk->queue, &lim); + mddev_resume(mddev); +} +EXPORT_SYMBOL_GPL(mddev_update_io_opt); + static void mddev_delayed_delete(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); diff --git a/drivers/md/md.h b/drivers/md/md.h index d5e99653c690..0d1119be0b50 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -908,6 +908,9 @@ void md_autostart_arrays(int part); int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); int do_md_run(struct mddev *mddev); +void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim); +int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev); +void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes); extern const struct block_device_operations md_fops; -- cgit v1.2.3 From 56cf22d6f672453f8d3392776c9317f217e68dca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:44 -0700 Subject: md/raid0: use the atomic queue limit update APIs Build the queue limits outside the queue and apply them using queue_limits_set. To make the code more obvious also split the queue limits handling into a separate helper function. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-6-hch@lst.de --- drivers/md/raid0.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 9f787ae77ede..f65aa6ecec04 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -379,6 +379,19 @@ static void raid0_free(struct mddev *mddev, void *priv) free_conf(mddev, conf); } +static int raid0_set_limits(struct mddev *mddev) +{ + struct queue_limits lim; + + blk_set_stacking_limits(&lim); + lim.max_hw_sectors = mddev->chunk_sectors; + lim.max_write_zeroes_sectors = mddev->chunk_sectors; + lim.io_min = mddev->chunk_sectors << 9; + lim.io_opt = lim.io_min * mddev->raid_disks; + mddev_stack_rdev_limits(mddev, &lim); + return queue_limits_set(mddev->queue, &lim); +} + static int raid0_run(struct mddev *mddev) { struct r0conf *conf; @@ -400,19 +413,9 @@ static int raid0_run(struct mddev *mddev) } conf = mddev->private; if (!mddev_is_dm(mddev)) { - struct md_rdev *rdev; - - blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); - - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - blk_queue_io_opt(mddev->queue, - (mddev->chunk_sectors << 9) * mddev->raid_disks); - - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - } + ret = raid0_set_limits(mddev); + if (ret) + goto out_free_conf; } /* calculate array device size */ @@ -426,8 +429,10 @@ static int raid0_run(struct mddev *mddev) ret = md_integrity_register(mddev); if (ret) - free_conf(mddev, conf); - + goto out_free_conf; + return 0; +out_free_conf: + free_conf(mddev, conf); return ret; } -- cgit v1.2.3 From 97894f7d3c2966164516a8a5109674763d3a55e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:45 -0700 Subject: md/raid1: use the atomic queue limit update APIs Build the queue limits outside the queue and apply them using queue_limits_set. To make the code more obvious also split the queue limits handling into a separate helper function. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-7-hch@lst.de --- drivers/md/raid1.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index dd1393d0f084..c34968378377 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1926,12 +1926,11 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) for (mirror = first; mirror <= last; mirror++) { p = conf->mirrors + mirror; if (!p->rdev) { - if (!mddev_is_dm(mddev)) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + err = mddev_stack_new_rdev(mddev, rdev); + if (err) + return err; raid1_add_conf(conf, rdev, mirror, false); - err = 0; /* As all devices are equivalent, we don't need a full recovery * if this was recently any drive of the array */ @@ -3195,12 +3194,21 @@ static struct r1conf *setup_conf(struct mddev *mddev) return ERR_PTR(err); } +static int raid1_set_limits(struct mddev *mddev) +{ + struct queue_limits lim; + + blk_set_stacking_limits(&lim); + lim.max_write_zeroes_sectors = 0; + mddev_stack_rdev_limits(mddev, &lim); + return queue_limits_set(mddev->queue, &lim); +} + static void raid1_free(struct mddev *mddev, void *priv); static int raid1_run(struct mddev *mddev) { struct r1conf *conf; int i; - struct md_rdev *rdev; int ret; if (mddev->level != 1) { @@ -3228,10 +3236,9 @@ static int raid1_run(struct mddev *mddev) return PTR_ERR(conf); if (!mddev_is_dm(mddev)) { - blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - rdev_for_each(rdev, mddev) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + ret = raid1_set_limits(mddev); + if (ret) + goto abort; } mddev->degraded = 0; -- cgit v1.2.3 From f63f17350e537300312e9e19b51f69b61fa44291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:46 -0700 Subject: md/raid5: use the atomic queue limit update APIs Build the queue limits outside the queue and apply them using queue_limits_set. To make the code more obvious also split the queue limits handling into separate helpers. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-8-hch@lst.de --- drivers/md/raid5.c | 130 ++++++++++++++++++++++++++--------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8a81176eff8..5012069782ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded return 0; } -static void raid5_set_io_opt(struct r5conf *conf) +static int raid5_set_limits(struct mddev *mddev) { - blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * - (conf->raid_disks - conf->max_degraded)); + struct r5conf *conf = mddev->private; + struct queue_limits lim; + int data_disks, stripe; + struct md_rdev *rdev; + + /* + * The read-ahead size must cover two whole stripes, which is + * 2 * (datadisks) * chunksize where 'n' is the number of raid devices. + */ + data_disks = conf->previous_raid_disks - conf->max_degraded; + + /* + * We can only discard a whole stripe. It doesn't make sense to + * discard data disk but write parity disk + */ + stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9)); + + blk_set_stacking_limits(&lim); + lim.io_min = mddev->chunk_sectors << 9; + lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded); + lim.raid_partial_stripes_expensive = 1; + lim.discard_granularity = stripe; + lim.max_write_zeroes_sectors = 0; + mddev_stack_rdev_limits(mddev, &lim); + rdev_for_each(rdev, mddev) + queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset, + mddev->gendisk->disk_name); + + /* + * Zeroing is required for discard, otherwise data could be lost. + * + * Consider a scenario: discard a stripe (the stripe could be + * inconsistent if discard_zeroes_data is 0); write one disk of the + * stripe (the stripe could be inconsistent again depending on which + * disks are used to calculate parity); the disk is broken; The stripe + * data of this disk is lost. + * + * We only allow DISCARD if the sysadmin has confirmed that only safe + * devices are in use by setting a module parameter. A better idea + * might be to turn DISCARD into WRITE_ZEROES requests, as that is + * required to be safe. + */ + if (!devices_handle_discard_safely || + lim.max_discard_sectors < (stripe >> 9) || + lim.discard_granularity < stripe) + lim.max_hw_discard_sectors = 0; + + /* + * Requests require having a bitmap for each stripe. + * Limit the max sectors based on this. + */ + lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf); + + /* No restrictions on the number of segments in the request */ + lim.max_segments = USHRT_MAX; + + return queue_limits_set(mddev->queue, &lim); } static int raid5_run(struct mddev *mddev) @@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev) int i; long long min_offset_diff = 0; int first = 1; + int ret = -EIO; if (mddev->recovery_cp != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", @@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); if (!mddev_is_dm(mddev)) { - int chunk_size; - /* read-ahead size must cover two whole stripes, which - * is 2 * (datadisks) * chunksize where 'n' is the - * number of raid devices - */ - int data_disks = conf->previous_raid_disks - conf->max_degraded; - int stripe = data_disks * - ((mddev->chunk_sectors << 9) / PAGE_SIZE); - - chunk_size = mddev->chunk_sectors << 9; - blk_queue_io_min(mddev->queue, chunk_size); - raid5_set_io_opt(conf); - mddev->queue->limits.raid_partial_stripes_expensive = 1; - /* - * We can only discard a whole stripe. It doesn't make sense to - * discard data disk but write parity disk - */ - stripe = stripe * PAGE_SIZE; - stripe = roundup_pow_of_two(stripe); - mddev->queue->limits.discard_granularity = stripe; - - blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->new_data_offset << 9); - } - - /* - * zeroing is required, otherwise data - * could be lost. Consider a scenario: discard a stripe - * (the stripe could be inconsistent if - * discard_zeroes_data is 0); write one disk of the - * stripe (the stripe could be inconsistent again - * depending on which disks are used to calculate - * parity); the disk is broken; The stripe data of this - * disk is lost. - * - * We only allow DISCARD if the sysadmin has confirmed that - * only safe devices are in use by setting a module parameter. - * A better idea might be to turn DISCARD into WRITE_ZEROES - * requests, as that is required to be safe. - */ - if (!devices_handle_discard_safely || - mddev->queue->limits.max_discard_sectors < (stripe >> 9) || - mddev->queue->limits.discard_granularity < stripe) - blk_queue_max_discard_sectors(mddev->queue, 0); - - /* - * Requests require having a bitmap for each stripe. - * Limit the max sectors based on this. - */ - blk_queue_max_hw_sectors(mddev->queue, - RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf)); - - /* No restrictions on the number of segments in the request */ - blk_queue_max_segments(mddev->queue, USHRT_MAX); + ret = raid5_set_limits(mddev); + if (ret) + goto abort; } if (log_init(conf, journal_dev, raid5_has_ppl(conf))) @@ -8031,7 +8031,7 @@ abort: free_conf(conf); mddev->private = NULL; pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); - return -EIO; + return ret; } static void raid5_free(struct mddev *mddev, void *priv) @@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - if (!mddev_is_dm(conf->mddev)) - raid5_set_io_opt(conf); + mddev_update_io_opt(conf->mddev, + conf->raid_disks - conf->max_degraded); } } -- cgit v1.2.3 From 3d8466ba68d444f5528dcbff106e8bf5c7d51aa0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:47 -0700 Subject: md/raid10: use the atomic queue limit update APIs Build the queue limits outside the queue and apply them using queue_limits_set. To make the code more obvious also split the queue limits handling into separate helpers. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-9-hch@lst.de --- drivers/md/raid10.c | 60 +++++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4021cf06b3a6..e96fdf47319f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2106,10 +2106,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) continue; } - if (!mddev_is_dm(mddev)) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - + err = mddev_stack_new_rdev(mddev, rdev); + if (err) + return err; p->head_position = 0; p->recovery_disabled = mddev->recovery_disabled - 1; rdev->raid_disk = mirror; @@ -2125,10 +2124,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) clear_bit(In_sync, &rdev->flags); set_bit(Replacement, &rdev->flags); rdev->raid_disk = repl_slot; - err = 0; - if (!mddev_is_dm(mddev)) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + err = mddev_stack_new_rdev(mddev, rdev); + if (err) + return err; conf->fullsync = 1; WRITE_ONCE(p->replacement, rdev); } @@ -3969,14 +3967,26 @@ static struct r10conf *setup_conf(struct mddev *mddev) return ERR_PTR(err); } -static void raid10_set_io_opt(struct r10conf *conf) +static unsigned int raid10_nr_stripes(struct r10conf *conf) { - int raid_disks = conf->geo.raid_disks; + unsigned int raid_disks = conf->geo.raid_disks; + + if (conf->geo.raid_disks % conf->geo.near_copies) + return raid_disks; + return raid_disks / conf->geo.near_copies; +} - if (!(conf->geo.raid_disks % conf->geo.near_copies)) - raid_disks /= conf->geo.near_copies; - blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) * - raid_disks); +static int raid10_set_queue_limits(struct mddev *mddev) +{ + struct r10conf *conf = mddev->private; + struct queue_limits lim; + + blk_set_stacking_limits(&lim); + lim.max_write_zeroes_sectors = 0; + lim.io_min = mddev->chunk_sectors << 9; + lim.io_opt = lim.io_min * raid10_nr_stripes(conf); + mddev_stack_rdev_limits(mddev, &lim); + return queue_limits_set(mddev->queue, &lim); } static int raid10_run(struct mddev *mddev) @@ -3988,6 +3998,7 @@ static int raid10_run(struct mddev *mddev) sector_t size; sector_t min_offset_diff = 0; int first = 1; + int ret = -EIO; if (mddev->private == NULL) { conf = setup_conf(mddev); @@ -4014,12 +4025,6 @@ static int raid10_run(struct mddev *mddev) } } - if (!mddev_is_dm(conf->mddev)) { - blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - raid10_set_io_opt(conf); - } - rdev_for_each(rdev, mddev) { long long diff; @@ -4048,14 +4053,16 @@ static int raid10_run(struct mddev *mddev) if (first || diff < min_offset_diff) min_offset_diff = diff; - if (!mddev_is_dm(mddev)) - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - disk->head_position = 0; first = 0; } + if (!mddev_is_dm(conf->mddev)) { + ret = raid10_set_queue_limits(mddev); + if (ret) + goto out_free_conf; + } + /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { pr_err("md/raid10:%s: not enough operational mirrors.\n", @@ -4156,7 +4163,7 @@ out_free_conf: raid10_free_conf(conf); mddev->private = NULL; out: - return -EIO; + return ret; } static void raid10_free(struct mddev *mddev, void *priv) @@ -4933,8 +4940,7 @@ static void end_reshape(struct r10conf *conf) conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); - if (!mddev_is_dm(conf->mddev)) - raid10_set_io_opt(conf); + mddev_update_io_opt(conf->mddev, raid10_nr_stripes(conf)); conf->fullsync = 0; } -- cgit v1.2.3 From 81a16e19d545fd244ad176f7222d92b67215a33b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:48 -0700 Subject: md: don't initialize queue limits Initial queue limits are now set from ->run. Remove the superfluous initialization in md_alloc and level_store. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-10-hch@lst.de --- drivers/md/md.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 7c9fb7dcf0d4..4bc47ed2d7a8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4164,7 +4164,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->in_sync = 1; del_timer_sync(&mddev->safemode_timer); } - blk_set_stacking_limits(&mddev->queue->limits); pers->run(mddev); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); if (!mddev->thread) @@ -5879,7 +5878,6 @@ struct mddev *md_alloc(dev_t dev, char *name) disk->private_data = mddev; mddev->queue = disk->queue; - blk_set_stacking_limits(&mddev->queue->limits); blk_queue_write_cache(mddev->queue, true, true); disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; -- cgit v1.2.3 From 396799eb5b6f87ec2d759e1a90e179f7058ab9e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2024 07:01:49 -0700 Subject: md: remove mddev->queue Just use the request_queue from the gendisk pointer in the relatively few places that sill need it. Signed-off-by: Christoph Hellwig Reviewed--by: Song Liu Tested-by: Song Liu Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240303140150.5435-11-hch@lst.de --- drivers/md/md.c | 22 ++++++++++++---------- drivers/md/md.h | 5 ++--- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 2 +- drivers/md/raid10.c | 2 +- drivers/md/raid5-ppl.c | 3 ++- drivers/md/raid5.c | 13 +++++++------ 7 files changed, 26 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 4bc47ed2d7a8..7d7b982e369c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5770,10 +5770,10 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev) if (mddev_is_dm(mddev)) return 0; - lim = queue_limits_start_update(mddev->queue); + lim = queue_limits_start_update(mddev->gendisk->queue); queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); - return queue_limits_commit_update(mddev->queue, &lim); + return queue_limits_commit_update(mddev->gendisk->queue, &lim); } EXPORT_SYMBOL_GPL(mddev_stack_new_rdev); @@ -5877,8 +5877,7 @@ struct mddev *md_alloc(dev_t dev, char *name) disk->fops = &md_fops; disk->private_data = mddev; - mddev->queue = disk->queue; - blk_queue_write_cache(mddev->queue, true, true); + blk_queue_write_cache(disk->queue, true, true); disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; error = add_disk(disk); @@ -6183,6 +6182,7 @@ int md_run(struct mddev *mddev) } if (!mddev_is_dm(mddev)) { + struct request_queue *q = mddev->gendisk->queue; bool nonrot = true; rdev_for_each(rdev, mddev) { @@ -6194,14 +6194,14 @@ int md_run(struct mddev *mddev) if (mddev->degraded) nonrot = false; if (nonrot) - blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); else - blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); - blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue); + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q); /* Set the NOWAIT flags if all underlying devices support it */ if (nowait) - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); } if (pers->sync_request) { if (mddev->kobj.sd && @@ -6447,8 +6447,10 @@ static void mddev_detach(struct mddev *mddev) mddev->pers->quiesce(mddev, 0); } md_unregister_thread(mddev, &mddev->thread); + + /* the unplug fn references 'conf' */ if (!mddev_is_dm(mddev)) - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ + blk_sync_queue(mddev->gendisk->queue); } static void __md_stop(struct mddev *mddev) @@ -7166,7 +7168,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) if (!bdev_nowait(rdev->bdev)) { pr_info("%s: Disabling nowait because %pg does not support nowait\n", mdname(mddev), rdev->bdev); - blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue); + blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue); } /* * Kick recovery, maybe this spare has to be added to the diff --git a/drivers/md/md.h b/drivers/md/md.h index 0d1119be0b50..67e50c44f4b5 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -480,7 +480,6 @@ struct mddev { struct timer_list safemode_timer; struct percpu_ref writes_pending; int sync_checkers; /* # of threads checking writes_pending */ - struct request_queue *queue; /* for plugging ... */ struct bitmap *bitmap; /* the bitmap for the device */ struct { @@ -869,7 +868,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio { if (bio_op(bio) == REQ_OP_WRITE_ZEROES && !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors) - mddev->queue->limits.max_write_zeroes_sectors = 0; + mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0; } static inline int mddev_suspend_and_lock(struct mddev *mddev) @@ -932,7 +931,7 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, #define mddev_add_trace_msg(mddev, fmt, args...) \ do { \ if (!mddev_is_dm(mddev)) \ - blk_add_trace_msg((mddev)->queue, fmt, ##args); \ + blk_add_trace_msg((mddev)->gendisk->queue, fmt, ##args); \ } while (0) #endif /* _MD_MD_H */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f65aa6ecec04..c5d4aeb68404 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -389,7 +389,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; mddev_stack_rdev_limits(mddev, &lim); - return queue_limits_set(mddev->queue, &lim); + return queue_limits_set(mddev->gendisk->queue, &lim); } static int raid0_run(struct mddev *mddev) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c34968378377..be8ac24f50b6 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3201,7 +3201,7 @@ static int raid1_set_limits(struct mddev *mddev) blk_set_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; mddev_stack_rdev_limits(mddev, &lim); - return queue_limits_set(mddev->queue, &lim); + return queue_limits_set(mddev->gendisk->queue, &lim); } static void raid1_free(struct mddev *mddev, void *priv); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e96fdf47319f..b0fd3005f5c1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3986,7 +3986,7 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); mddev_stack_rdev_limits(mddev, &lim); - return queue_limits_set(mddev->queue, &lim); + return queue_limits_set(mddev->gendisk->queue, &lim); } static int raid10_run(struct mddev *mddev) diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index da4ba736c4f0..a70cbec12ed0 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1393,7 +1393,8 @@ int ppl_init_log(struct r5conf *conf) ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); ppl_conf->block_size = 512; } else { - ppl_conf->block_size = queue_logical_block_size(mddev->queue); + ppl_conf->block_size = + queue_logical_block_size(mddev->gendisk->queue); } for (i = 0; i < ppl_conf->count; i++) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5012069782ba..f2e3c3e2d879 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4275,9 +4275,10 @@ static int handle_stripe_dirtying(struct r5conf *conf, } } if (rcw && !mddev_is_dm(conf->mddev)) - blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", - (unsigned long long)sh->sector, - rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); + blk_add_trace_msg(conf->mddev->gendisk->queue, + "raid5 rcw %llu %d %d %d", + (unsigned long long)sh->sector, rcw, qread, + test_bit(STRIPE_DELAYED, &sh->state)); } if (rcw > disks && rmw > disks && @@ -5686,7 +5687,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) release_inactive_stripe_list(conf, cb->temp_inactive_list, NR_STRIPE_HASH_LOCKS); if (!mddev_is_dm(mddev)) - trace_block_unplug(mddev->queue, cnt, !from_schedule); + trace_block_unplug(mddev->gendisk->queue, cnt, !from_schedule); kfree(cb); } @@ -7089,7 +7090,7 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { - struct request_queue *q = mddev->queue; + struct request_queue *q = mddev->gendisk->queue; conf->skip_copy = new; if (new) @@ -7749,7 +7750,7 @@ static int raid5_set_limits(struct mddev *mddev) /* No restrictions on the number of segments in the request */ lim.max_segments = USHRT_MAX; - return queue_limits_set(mddev->queue, &lim); + return queue_limits_set(mddev->gendisk->queue, &lim); } static int raid5_run(struct mddev *mddev) -- cgit v1.2.3 From 177cddaa5bdfcbc4c3d4594bb44ed8338765fc29 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 6 Mar 2024 16:10:04 +0000 Subject: spi: cs42l43: Don't limit native CS to the first chip select As the chip selects can be configured through ACPI/OF/swnode, and the set_cs() callback will only be called when a native chip select is being used, there is no reason for the driver to only support the native chip select as the first chip select. Remove the check that introduces this limitation. Fixes: ef75e767167a ("spi: cs42l43: Add SPI controller support") Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240306161004.2205113-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index b24190526ce9..adf19e8c4c8a 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -148,8 +148,7 @@ static void cs42l43_set_cs(struct spi_device *spi, bool is_high) { struct cs42l43_spi *priv = spi_controller_get_devdata(spi->controller); - if (spi_get_chipselect(spi, 0) == 0) - regmap_write(priv->regmap, CS42L43_SPI_CONFIG2, !is_high); + regmap_write(priv->regmap, CS42L43_SPI_CONFIG2, !is_high); } static int cs42l43_prepare_message(struct spi_controller *ctlr, struct spi_message *msg) -- cgit v1.2.3 From 6601c15c8a0680edb0d23a13151adb8023959149 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Wed, 28 Feb 2024 17:36:28 +0800 Subject: drm/amd/swsmu: modify the gfx activity scaling Add an if condition for gfx activity because the scaling has been changed after smu fw version 5d4600. And remove a warning log. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 2 -- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 4894f7ee737b..6dae5ad74ff0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -229,8 +229,6 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2; break; case IP_VERSION(14, 0, 0): - if ((smu->smc_fw_version < 0x5d3a00)) - dev_warn(smu->adev->dev, "The PMFW version(%x) is behind in this BIOS!\n", smu->smc_fw_version); smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0; break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 47fdbae4adfc..9310c4758e38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -261,7 +261,10 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->MpipuclkFrequency; break; case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->GfxActivity / 100; + if ((smu->smc_fw_version > 0x5d4600)) + *value = metrics->GfxActivity; + else + *value = metrics->GfxActivity / 100; break; case METRICS_AVERAGE_VCNACTIVITY: *value = metrics->VcnActivity / 100; -- cgit v1.2.3 From e9098cc9aef13bd56e821f628c83f709d3347af1 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 27 Feb 2024 16:08:25 -0300 Subject: drm/amd/display: check dc_link before dereferencing drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:6683 amdgpu_dm_connector_funcs_force() warn: variable dereferenced before check 'dc_link' (see line 6663) Fixes: 967176179215 ("drm/amd/display: fix null-pointer dereference on edid reading") Reported-by: Dan Carpenter Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5853cf022917..ec835f0c9192 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6539,7 +6539,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct edid *edid; struct i2c_adapter *ddc; - if (dc_link->aux_mode) + if (dc_link && dc_link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; else ddc = &aconnector->i2c->base; -- cgit v1.2.3 From 937844d661354bf142dc1c621396fdab10ecbacc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Feb 2024 15:59:22 -0500 Subject: drm/amd/display: handle range offsets in VRR ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to check the offset bits for values greater than 255. v2: also update amdgpu_dm_connector values. Suggested-by: Mano Ségransan Tested-by: Mano Ségransan Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3203 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ec835f0c9192..1a9bbb04bd5e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11169,14 +11169,23 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; - amdgpu_dm_connector->min_vfreq = range->min_vfreq; - amdgpu_dm_connector->max_vfreq = range->max_vfreq; - amdgpu_dm_connector->pixel_clock_mhz = - range->pixel_clock_mhz * 10; - connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) + connector->display_info.monitor_range.min_vfreq += 255; + if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ) + connector->display_info.monitor_range.max_vfreq += 255; + } + + amdgpu_dm_connector->min_vfreq = + connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = + connector->display_info.monitor_range.max_vfreq; + amdgpu_dm_connector->pixel_clock_mhz = + range->pixel_clock_mhz * 10; + break; } -- cgit v1.2.3 From 0dafaf659cc463f2db0af92003313a8bc46781cd Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 1 Mar 2024 15:36:58 +0800 Subject: drm/amdgpu/pm: Fix the error of pwm1_enable setting Fix the pwm_mode value error which used for pwm1_enable setting Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 087d57850304..39c5e1dfa275 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2558,6 +2558,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); int err, ret; + u32 pwm_mode; int value; if (amdgpu_in_reset(adev)) @@ -2569,13 +2570,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; + if (value == 0) + pwm_mode = AMD_FAN_CTRL_NONE; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else if (value == 2) + pwm_mode = AMD_FAN_CTRL_AUTO; + else + return -EINVAL; + ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (ret < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; } - ret = amdgpu_dpm_set_fan_control_mode(adev, value); + ret = amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); -- cgit v1.2.3 From a4e7596e209783a7be2727d6b947cbd863c2bbcb Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 6 Mar 2024 15:31:10 -0500 Subject: drm/xe: Return immediately on tile_init failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason to proceed with applying workaround and initing sysfs if we are going to abort the probe upon failure. Fixes: e5a845fd8fa4 ("drm/xe: Add sysfs entry for tile") Cc: Lucas De Marchi Cc: Matt Roper Cc: Matthew Auld Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240306203110.146387-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit af7b93d1d7eeeef674681ddea875be6a29857a5d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_tile.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 044c20881de7..0650b2fa75ef 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -167,9 +167,10 @@ int xe_tile_init_noalloc(struct xe_tile *tile) goto err_mem_access; tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) + if (IS_ERR(tile->mem.kernel_bb_pool)) { err = PTR_ERR(tile->mem.kernel_bb_pool); - + goto err_mem_access; + } xe_wa_apply_tile_workarounds(tile); xe_tile_sysfs_init(tile); -- cgit v1.2.3 From 4ece8fc439c370b1aec26a44b9f94fb214068d42 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Feb 2024 09:52:26 +0000 Subject: drm/tests/buddy: fix print format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will report a build warning once we have: 806cb2270237 ("kunit: Annotate _MSG assertion variants with gnu printf specifiers"). Reported-by: Stephen Rothwell Fixes: c70703320e55 ("drm/tests/drm_buddy: add alloc_range_bias test") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://lore.kernel.org/r/20240229095225.242795-2-matthew.auld@intel.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_buddy_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index 1008d5b9d61e..9ef5b17ae3f6 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -189,7 +189,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) &allocated, DRM_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, size); + bias_start, bias_end, size, ps); bias_rem -= size; /* -- cgit v1.2.3 From ba18deddd6d502da71fd6b6143c53042271b82bd Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Wed, 6 Mar 2024 18:57:14 +0800 Subject: net: pds_core: Fix possible double free in error handling path When auxiliary_device_add() returns error and then calls auxiliary_device_uninit(), Callback function pdsc_auxbus_dev_release calls kfree(padev) to free memory. We shouldn't call kfree(padev) again in the error handling path. Fix this by cleaning up the redundant kfree() and putting the error handling back to where the errors happened. Fixes: 4569cce43bc6 ("pds_core: add auxiliary_bus devices") Signed-off-by: Yongzhi Liu Reviewed-by: Wojciech Drewek Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240306105714.20597-1-hyperlyzcs@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/pds_core/auxbus.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 11c23a7f3172..fd1a5149c003 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -160,23 +160,19 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, if (err < 0) { dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out; + kfree(padev); + return ERR_PTR(err); } err = auxiliary_device_add(aux_dev); if (err) { dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out_uninit; + auxiliary_device_uninit(aux_dev); + return ERR_PTR(err); } return padev; - -err_out_uninit: - auxiliary_device_uninit(aux_dev); -err_out: - kfree(padev); - return ERR_PTR(err); } int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) -- cgit v1.2.3 From 0dc31b98d7200a0046de5c760feb0aaff6c4b53c Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 7 Mar 2024 19:08:37 +0100 Subject: cdrom: gdrom: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240307180837.190626-2-u.kleine-koenig@pengutronix.de Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 9398beeb5d1e..eefdd422ad8e 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -831,7 +831,7 @@ probe_fail_no_mem: return err; } -static int remove_gdrom(struct platform_device *devptr) +static void remove_gdrom(struct platform_device *devptr) { blk_mq_free_tag_set(&gd.tag_set); free_irq(HW_EVENT_GDROM_CMD, &gd); @@ -842,13 +842,11 @@ static int remove_gdrom(struct platform_device *devptr) unregister_cdrom(gd.cd_info); kfree(gd.cd_info); kfree(gd.toc); - - return 0; } static struct platform_driver gdrom_driver = { .probe = probe_gdrom, - .remove = remove_gdrom, + .remove_new = remove_gdrom, .driver = { .name = GDROM_DEV_NAME, }, -- cgit v1.2.3 From fbf8d71742557abaf558d8efb96742d442720cc2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 22 Feb 2024 15:26:54 +0100 Subject: Input: synaptics-rmi4 - fix UAF of IRQ domain on driver removal Calling irq_domain_remove() will lead to freeing the IRQ domain prematurely. The domain is still referenced and will be attempted to get used via rmi_free_function_list() -> rmi_unregister_function() -> irq_dispose_mapping() -> irq_get_irq_data()'s ->domain pointer. With PaX's MEMORY_SANITIZE this will lead to an access fault when attempting to dereference embedded pointers, as in Torsten's report that was faulting on the 'domain->ops->unmap' test. Fix this by releasing the IRQ domain only after all related IRQs have been deactivated. Fixes: 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain") Reported-by: Torsten Hilbrich Signed-off-by: Mathias Krause Link: https://lore.kernel.org/r/20240222142654.856566-1-minipli@grsecurity.net Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395..42eaebb3bf5c 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -978,12 +978,12 @@ static int rmi_driver_remove(struct device *dev) rmi_disable_irq(rmi_dev, false); - irq_domain_remove(data->irqdomain); - data->irqdomain = NULL; - rmi_f34_remove_sysfs(rmi_dev); rmi_free_function_list(rmi_dev); + irq_domain_remove(data->irqdomain); + data->irqdomain = NULL; + return 0; } -- cgit v1.2.3 From b7cc4ff787a572edf2c55caeffaa88cd801eb135 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 Feb 2024 16:19:47 +1000 Subject: nouveau: lock the client object tree. It appears the client object tree has no locking unless I've missed something else. Fix races around adding/removing client objects, mostly vram bar mappings. 4562.099306] general protection fault, probably for non-canonical address 0x6677ed422bceb80c: 0000 [#1] PREEMPT SMP PTI [ 4562.099314] CPU: 2 PID: 23171 Comm: deqp-vk Not tainted 6.8.0-rc6+ #27 [ 4562.099324] Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021 [ 4562.099330] RIP: 0010:nvkm_object_search+0x1d/0x70 [nouveau] [ 4562.099503] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 48 89 f8 48 85 f6 74 39 48 8b 87 a0 00 00 00 48 85 c0 74 12 <48> 8b 48 f8 48 39 ce 73 15 48 8b 40 10 48 85 c0 75 ee 48 c7 c0 fe [ 4562.099506] RSP: 0000:ffffa94cc420bbf8 EFLAGS: 00010206 [ 4562.099512] RAX: 6677ed422bceb814 RBX: ffff98108791f400 RCX: ffff9810f26b8f58 [ 4562.099517] RDX: 0000000000000000 RSI: ffff9810f26b9158 RDI: ffff98108791f400 [ 4562.099519] RBP: ffff9810f26b9158 R08: 0000000000000000 R09: 0000000000000000 [ 4562.099521] R10: ffffa94cc420bc48 R11: 0000000000000001 R12: ffff9810f02a7cc0 [ 4562.099526] R13: 0000000000000000 R14: 00000000000000ff R15: 0000000000000007 [ 4562.099528] FS: 00007f629c5017c0(0000) GS:ffff98142c700000(0000) knlGS:0000000000000000 [ 4562.099534] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4562.099536] CR2: 00007f629a882000 CR3: 000000017019e004 CR4: 00000000003706f0 [ 4562.099541] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4562.099542] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 4562.099544] Call Trace: [ 4562.099555] [ 4562.099573] ? die_addr+0x36/0x90 [ 4562.099583] ? exc_general_protection+0x246/0x4a0 [ 4562.099593] ? asm_exc_general_protection+0x26/0x30 [ 4562.099600] ? nvkm_object_search+0x1d/0x70 [nouveau] [ 4562.099730] nvkm_ioctl+0xa1/0x250 [nouveau] [ 4562.099861] nvif_object_map_handle+0xc8/0x180 [nouveau] [ 4562.099986] nouveau_ttm_io_mem_reserve+0x122/0x270 [nouveau] [ 4562.100156] ? dma_resv_test_signaled+0x26/0xb0 [ 4562.100163] ttm_bo_vm_fault_reserved+0x97/0x3c0 [ttm] [ 4562.100182] ? __mutex_unlock_slowpath+0x2a/0x270 [ 4562.100189] nouveau_ttm_fault+0x69/0xb0 [nouveau] [ 4562.100356] __do_fault+0x32/0x150 [ 4562.100362] do_fault+0x7c/0x560 [ 4562.100369] __handle_mm_fault+0x800/0xc10 [ 4562.100382] handle_mm_fault+0x17c/0x3e0 [ 4562.100388] do_user_addr_fault+0x208/0x860 [ 4562.100395] exc_page_fault+0x7f/0x200 [ 4562.100402] asm_exc_page_fault+0x26/0x30 [ 4562.100412] RIP: 0033:0x9b9870 [ 4562.100419] Code: 85 a8 f7 ff ff 8b 8d 80 f7 ff ff 89 08 e9 18 f2 ff ff 0f 1f 84 00 00 00 00 00 44 89 32 e9 90 fa ff ff 0f 1f 84 00 00 00 00 00 <44> 89 32 e9 f8 f1 ff ff 0f 1f 84 00 00 00 00 00 66 44 89 32 e9 e7 [ 4562.100422] RSP: 002b:00007fff9ba2dc70 EFLAGS: 00010246 [ 4562.100426] RAX: 0000000000000004 RBX: 000000000dd65e10 RCX: 000000fff0000000 [ 4562.100428] RDX: 00007f629a882000 RSI: 00007f629a882000 RDI: 0000000000000066 [ 4562.100432] RBP: 00007fff9ba2e570 R08: 0000000000000000 R09: 0000000123ddf000 [ 4562.100434] R10: 0000000000000001 R11: 0000000000000246 R12: 000000007fffffff [ 4562.100436] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 4562.100446] [ 4562.100448] Modules linked in: nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink cmac bnep sunrpc iwlmvm intel_rapl_msr intel_rapl_common snd_sof_pci_intel_cnl x86_pkg_temp_thermal intel_powerclamp snd_sof_intel_hda_common mac80211 coretemp snd_soc_acpi_intel_match kvm_intel snd_soc_acpi snd_soc_hdac_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof kvm snd_sof_utils snd_soc_core snd_hda_codec_realtek libarc4 snd_hda_codec_generic snd_compress snd_hda_ext_core vfat fat snd_hda_intel snd_intel_dspcfg irqbypass iwlwifi snd_hda_codec snd_hwdep snd_hda_core btusb btrtl mei_hdcp iTCO_wdt rapl mei_pxp btintel snd_seq iTCO_vendor_support btbcm snd_seq_device intel_cstate bluetooth snd_pcm cfg80211 intel_wmi_thunderbolt wmi_bmof intel_uncore snd_timer mei_me snd ecdh_generic i2c_i801 [ 4562.100541] ecc mei i2c_smbus soundcore rfkill intel_pch_thermal acpi_pad zram nouveau drm_ttm_helper ttm gpu_sched i2c_algo_bit drm_gpuvm drm_exec mxm_wmi drm_display_helper drm_kms_helper drm crct10dif_pclmul crc32_pclmul nvme e1000e crc32c_intel nvme_core ghash_clmulni_intel video wmi pinctrl_cannonlake ip6_tables ip_tables fuse [ 4562.100616] ---[ end trace 0000000000000000 ]--- Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/include/nvkm/core/client.h | 1 + drivers/gpu/drm/nouveau/nvkm/core/client.c | 1 + drivers/gpu/drm/nouveau/nvkm/core/object.c | 26 +++++++++++++++++----- 3 files changed, 22 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h index 0d9fc741a719..932c9fd0b2d8 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h @@ -11,6 +11,7 @@ struct nvkm_client { u32 debug; struct rb_root objroot; + spinlock_t obj_lock; void *data; int (*event)(u64 token, void *argv, u32 argc); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c index ebdeb8eb9e77..c55662937ab2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/client.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c @@ -180,6 +180,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg, const char *dbg, client->device = device; client->debug = nvkm_dbgopt(dbg, "CLIENT"); client->objroot = RB_ROOT; + spin_lock_init(&client->obj_lock); client->event = event; INIT_LIST_HEAD(&client->umem); spin_lock_init(&client->lock); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c index 7c554c14e884..aea3ba72027a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/object.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c @@ -30,8 +30,10 @@ nvkm_object_search(struct nvkm_client *client, u64 handle, const struct nvkm_object_func *func) { struct nvkm_object *object; + unsigned long flags; if (handle) { + spin_lock_irqsave(&client->obj_lock, flags); struct rb_node *node = client->objroot.rb_node; while (node) { object = rb_entry(node, typeof(*object), node); @@ -40,9 +42,12 @@ nvkm_object_search(struct nvkm_client *client, u64 handle, else if (handle > object->object) node = node->rb_right; - else + else { + spin_unlock_irqrestore(&client->obj_lock, flags); goto done; + } } + spin_unlock_irqrestore(&client->obj_lock, flags); return ERR_PTR(-ENOENT); } else { object = &client->object; @@ -57,30 +62,39 @@ done: void nvkm_object_remove(struct nvkm_object *object) { + unsigned long flags; + + spin_lock_irqsave(&object->client->obj_lock, flags); if (!RB_EMPTY_NODE(&object->node)) rb_erase(&object->node, &object->client->objroot); + spin_unlock_irqrestore(&object->client->obj_lock, flags); } bool nvkm_object_insert(struct nvkm_object *object) { - struct rb_node **ptr = &object->client->objroot.rb_node; + struct rb_node **ptr; struct rb_node *parent = NULL; + unsigned long flags; + spin_lock_irqsave(&object->client->obj_lock, flags); + ptr = &object->client->objroot.rb_node; while (*ptr) { struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node); parent = *ptr; - if (object->object < this->object) + if (object->object < this->object) { ptr = &parent->rb_left; - else - if (object->object > this->object) + } else if (object->object > this->object) { ptr = &parent->rb_right; - else + } else { + spin_unlock_irqrestore(&object->client->obj_lock, flags); return false; + } } rb_link_node(&object->node, parent, ptr); rb_insert_color(&object->node, &object->client->objroot); + spin_unlock_irqrestore(&object->client->obj_lock, flags); return true; } -- cgit v1.2.3 From 09f02902eb9cd41d4b88f4a5b93696297b57a3b0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 3 Mar 2024 11:45:22 +0100 Subject: i2c: i801: Fix using mux_pdev before it's set i801_probe_optional_slaves() is called before i801_add_mux(). This results in mux_pdev being checked before it's set by i801_add_mux(). Fix this by changing the order of the calls. I consider this safe as I see no dependencies. Fixes: 80e56b86b59e ("i2c: i801: Simplify class-based client device instantiation") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-i801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 2c36b36d7d51..9a0a77383ca8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1742,9 +1742,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) i801_enable_host_notify(&priv->adapter); - i801_probe_optional_slaves(priv); /* We ignore errors - multiplexing is optional */ i801_add_mux(priv); + i801_probe_optional_slaves(priv); pci_set_drvdata(dev, priv); -- cgit v1.2.3 From ceb013b2d9a2946035de5e1827624edc85ae9484 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 4 Mar 2024 21:31:06 +0100 Subject: i2c: i801: Avoid potential double call to gpiod_remove_lookup_table If registering the platform device fails, the lookup table is removed in the error path. On module removal we would try to remove the lookup table again. Fix this by setting priv->lookup only if registering the platform device was successful. In addition free the memory allocated for the lookup table in the error path. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Reviewed-by: Linus Walleij Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-i801.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9a0a77383ca8..274e987e4cfa 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1416,7 +1416,6 @@ static void i801_add_mux(struct i801_priv *priv) lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip, mux_config->gpios[i], "mux", 0); gpiod_add_lookup_table(lookup); - priv->lookup = lookup; /* * Register the mux device, we use PLATFORM_DEVID_NONE here @@ -1430,7 +1429,10 @@ static void i801_add_mux(struct i801_priv *priv) sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { gpiod_remove_lookup_table(lookup); + devm_kfree(dev, lookup); dev_err(dev, "Failed to register i2c-mux-gpio device\n"); + } else { + priv->lookup = lookup; } } -- cgit v1.2.3 From 97fd62e3269d2d47cefd421ffe144f9eafab8315 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 5 Jan 2024 15:39:35 +0100 Subject: i2c: wmt: Fix an error handling path in wmt_i2c_probe() wmt_i2c_reset_hardware() calls clk_prepare_enable(). So, should an error occur after it, it should be undone by a corresponding clk_disable_unprepare() call, as already done in the remove function. Fixes: 560746eb79d3 ("i2c: vt8500: Add support for I2C bus on Wondermedia SoCs") Signed-off-by: Christophe JAILLET Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-wmt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/i2c/busses/i2c-wmt.c b/drivers/i2c/busses/i2c-wmt.c index ec2a8da134e5..198afee5233c 100644 --- a/drivers/i2c/busses/i2c-wmt.c +++ b/drivers/i2c/busses/i2c-wmt.c @@ -378,11 +378,15 @@ static int wmt_i2c_probe(struct platform_device *pdev) err = i2c_add_adapter(adap); if (err) - return err; + goto err_disable_clk; platform_set_drvdata(pdev, i2c_dev); return 0; + +err_disable_clk: + clk_disable_unprepare(i2c_dev->clk); + return err; } static void wmt_i2c_remove(struct platform_device *pdev) -- cgit v1.2.3 From ac168d6770aa12ee201c7474e1361810d5fc723a Mon Sep 17 00:00:00 2001 From: Tommy Huang Date: Tue, 5 Mar 2024 09:19:06 +0800 Subject: i2c: aspeed: Fix the dummy irq expected print When the i2c error condition occurred and master state was not idle, the master irq function will goto complete state without any other interrupt handling. It would cause dummy irq expected print. Under this condition, assign the irq_status into irq_handle. For example, when the abnormal start / stop occurred (bit 5) with normal stop status (bit 4) at same time. Then the normal stop status would not be handled and it would cause irq expected print in the aspeed_i2c_bus_irq. ... aspeed-i2c-bus x. i2c-bus: irq handled != irq. Expected 0x00000030, but was 0x00000020 ... Fixes: 3e9efc3299dd ("i2c: aspeed: Handle master/slave combined irq events properly") Cc: Jae Hyun Yoo Signed-off-by: Tommy Huang Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-aspeed.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 5511fd46a65e..ce8c4846b7fa 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -445,6 +445,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) irq_status); irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { + irq_handled = irq_status; bus->cmd_err = ret; bus->master_state = ASPEED_I2C_MASTER_INACTIVE; goto out_complete; -- cgit v1.2.3 From 4527a2194e7c2f88e940f9071084daa307ce08af Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 8 Mar 2024 09:51:06 +0100 Subject: EDAC/versal: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve this, there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shubhrajyoti Datta Link: https://lore.kernel.org/r/83deca1ce260f7e17ff3cb106c9a6946d4ca4505.1709886922.git.u.kleine-koenig@pengutronix.de --- drivers/edac/versal_edac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 3016870689f1..1688a5050f63 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -1160,7 +1160,7 @@ free_edac_mc: return rc; } -static int mc_remove(struct platform_device *pdev) +static void mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); struct edac_priv *priv = mci->pvt_info; @@ -1178,8 +1178,6 @@ static int mc_remove(struct platform_device *pdev) XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - - return 0; } static struct platform_driver xilinx_ddr_edac_mc_driver = { @@ -1188,7 +1186,7 @@ static struct platform_driver xilinx_ddr_edac_mc_driver = { .of_match_table = xlnx_edac_match, }, .probe = mc_probe, - .remove = mc_remove, + .remove_new = mc_remove, }; module_platform_driver(xilinx_ddr_edac_mc_driver); -- cgit v1.2.3 From d8d6608b76b98b7b88795a529d3d910ac9ef05f4 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 8 Mar 2024 09:51:04 +0100 Subject: block/swim: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/a00aea8201ea85ae726411bb0fb015ea026ff40a.1709886922.git.u.kleine-koenig@pengutronix.de Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 16bdf62067d8..6731678f3a41 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -916,7 +916,7 @@ out: return ret; } -static int swim_remove(struct platform_device *dev) +static void swim_remove(struct platform_device *dev) { struct swim_priv *swd = platform_get_drvdata(dev); int drive; @@ -937,13 +937,11 @@ static int swim_remove(struct platform_device *dev) release_mem_region(res->start, resource_size(res)); kfree(swd); - - return 0; } static struct platform_driver swim_driver = { .probe = swim_probe, - .remove = swim_remove, + .remove_new = swim_remove, .driver = { .name = CARDNAME, }, -- cgit v1.2.3 From bff4b74625fea851f9dd61e747a162d2f6b3317e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 Mar 2024 17:11:28 -0700 Subject: Revert "dm: use queue_limits_set" This reverts commit 8e0ef412869430d114158fc3b9b1fb111e247bd3. It's broken, and causes the boot to fail on encrypted volumes. Reported-and-bisected-by: Johannes Weiner Link: https://lore.kernel.org/all/20240311235023.GA1205@cmpxchg.org/ Acked-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/blk-settings.c | 2 +- drivers/md/dm-table.c | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/block/blk-settings.c b/block/blk-settings.c index 3c7d8d638ab5..e160d56e8eda 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -267,7 +267,7 @@ int queue_limits_commit_update(struct request_queue *q, EXPORT_SYMBOL_GPL(queue_limits_commit_update); /** - * queue_limits_set - apply queue limits to queue + * queue_limits_commit_set - apply queue limits to queue * @q: queue to update * @lim: limits to apply * diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 88114719fe18..41f1d731ae5a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1963,27 +1963,26 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, bool wc = false, fua = false; int r; + /* + * Copy table's limits to the DM device's request_queue + */ + q->limits = *limits; + if (dm_table_supports_nowait(t)) blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); else blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); if (!dm_table_supports_discards(t)) { - limits->max_hw_discard_sectors = 0; - limits->discard_granularity = 0; - limits->discard_alignment = 0; - limits->discard_misaligned = 0; + q->limits.max_discard_sectors = 0; + q->limits.max_hw_discard_sectors = 0; + q->limits.discard_granularity = 0; + q->limits.discard_alignment = 0; + q->limits.discard_misaligned = 0; } - if (!dm_table_supports_write_zeroes(t)) - limits->max_write_zeroes_sectors = 0; - if (!dm_table_supports_secure_erase(t)) - limits->max_secure_erase_sectors = 0; - - r = queue_limits_set(q, limits); - if (r) - return r; + q->limits.max_secure_erase_sectors = 0; if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; @@ -2008,6 +2007,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + if (!dm_table_supports_write_zeroes(t)) + q->limits.max_write_zeroes_sectors = 0; + dm_table_verify_integrity(t); /* @@ -2045,6 +2047,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } dm_update_crypto_profile(q, t); + disk_update_readahead(t->md->disk); /* * Check for request-based device is left to -- cgit v1.2.3